mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge master
This commit is contained in:
commit
61f2737e17
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,7 +1,7 @@
|
||||
### Changelog category (leave one):
|
||||
- New Feature
|
||||
- Improvement
|
||||
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
|
||||
- Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
- Performance Improvement
|
||||
- Backward Incompatible Change
|
||||
- Build/Testing/Packaging Improvement
|
||||
|
48
.github/workflows/codeql.yml
vendored
48
.github/workflows/codeql.yml
vendored
@ -1,48 +0,0 @@
|
||||
name: "CodeQL"
|
||||
|
||||
"on":
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
CC: clang-14
|
||||
CXX: clang++-14
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ['cpp']
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
sudo apt-get install -yq ninja-build
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
|
||||
ninja
|
||||
rm -rf ../contrib
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
43
benchmark/clickhouse/benchmark_cloud.sh
Executable file
43
benchmark/clickhouse/benchmark_cloud.sh
Executable file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
PARAMS="--host ... --secure --password ..."
|
||||
|
||||
if [ -x ./clickhouse ]
|
||||
then
|
||||
CLICKHOUSE_CLIENT="./clickhouse client"
|
||||
elif command -v clickhouse-client >/dev/null 2>&1
|
||||
then
|
||||
CLICKHOUSE_CLIENT="clickhouse-client"
|
||||
else
|
||||
echo "clickhouse-client is not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
QUERY_ID_PREFIX="benchmark_$RANDOM"
|
||||
QUERY_NUM=1
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query
|
||||
do
|
||||
for i in $(seq 1 $TRIES)
|
||||
do
|
||||
QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}"
|
||||
${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query"
|
||||
echo -n '.'
|
||||
done
|
||||
QUERY_NUM=$((QUERY_NUM + 1))
|
||||
echo
|
||||
done
|
||||
|
||||
sleep 10
|
||||
|
||||
${CLICKHOUSE_CLIENT} ${PARAMS} --query "
|
||||
WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run
|
||||
SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ','
|
||||
FROM clusterAllReplicas(default, system.query_log)
|
||||
WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%'
|
||||
GROUP BY num ORDER BY num FORMAT TSV
|
||||
"
|
@ -364,10 +364,8 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS})
|
||||
add_definitions(-DARROW_WITH_ZLIB)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
|
||||
|
||||
if (ARROW_WITH_ZSTD)
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
endif ()
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
|
||||
|
||||
add_library(_arrow ${ARROW_SRCS})
|
||||
@ -383,7 +381,6 @@ target_link_libraries(_arrow PRIVATE
|
||||
ch_contrib::snappy
|
||||
ch_contrib::zlib
|
||||
ch_contrib::zstd
|
||||
ch_contrib::zstd
|
||||
)
|
||||
target_link_libraries(_arrow PUBLIC _orc)
|
||||
|
||||
|
@ -101,7 +101,12 @@ EOL
|
||||
|
||||
function stop()
|
||||
{
|
||||
clickhouse stop
|
||||
clickhouse stop --do-not-kill && return
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" ||:
|
||||
clickhouse stop --force
|
||||
}
|
||||
|
||||
function start()
|
||||
@ -201,7 +206,7 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
|
||||
|
||||
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
|
||||
@ -387,7 +392,7 @@ for table in query_log trace_log; do
|
||||
done
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps (see gcore)
|
||||
|
@ -12,7 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN "
|
||||
SKIPPED_SIGN = "[ SKIPPED "
|
||||
HUNG_SIGN = "Found hung queries in processlist"
|
||||
|
||||
NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
@ -25,14 +25,14 @@ def process_test_log(log_path):
|
||||
success = 0
|
||||
hung = False
|
||||
retries = False
|
||||
task_timeout = True
|
||||
success_finish = False
|
||||
test_results = []
|
||||
with open(log_path, "r") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
if any(s in line for s in NO_TASK_TIMEOUT_SIGNS):
|
||||
task_timeout = False
|
||||
if any(s in line for s in SUCCESS_FINISH_SIGNS):
|
||||
success_finish = True
|
||||
if HUNG_SIGN in line:
|
||||
hung = True
|
||||
if RETRIES_SIGN in line:
|
||||
@ -81,7 +81,7 @@ def process_test_log(log_path):
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
)
|
||||
@ -108,7 +108,7 @@ def process_result(result_path):
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
@ -123,10 +123,10 @@ def process_result(result_path):
|
||||
description = "Some queries hung, "
|
||||
state = "failure"
|
||||
test_results.append(("Some queries hung", "FAIL", "0", ""))
|
||||
elif task_timeout:
|
||||
description = "Timeout, "
|
||||
elif not success_finish:
|
||||
description = "Tests are not finished, "
|
||||
state = "failure"
|
||||
test_results.append(("Timeout", "FAIL", "0", ""))
|
||||
test_results.append(("Tests are not finished", "FAIL", "0", ""))
|
||||
elif retries:
|
||||
description = "Some tests restarted, "
|
||||
test_results.append(("Some tests restarted", "SKIPPED", "0", ""))
|
||||
|
@ -694,6 +694,49 @@ auto s = std::string{"Hello"};
|
||||
|
||||
**2.** Exception specifiers from C++03 are not used.
|
||||
|
||||
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
|
||||
|
||||
```
|
||||
// Traditional way without syntactic sugar
|
||||
template <typename G, typename = std::enable_if_t<std::is_same<G, F>::value, void>> // SFINAE via std::enable_if, usage of ::value
|
||||
std::pair<int, int> func(const E<G> & e) // explicitly specified return type
|
||||
{
|
||||
if (elements.count(e)) // .count() membership test
|
||||
{
|
||||
// ...
|
||||
}
|
||||
|
||||
elements.erase(
|
||||
std::remove_if(
|
||||
elements.begin(), elements.end(),
|
||||
[&](const auto x){
|
||||
return x == 1;
|
||||
}),
|
||||
elements.end()); // remove-erase idiom
|
||||
|
||||
return std::make_pair(1, 2); // create pair via make_pair()
|
||||
}
|
||||
|
||||
// With syntactic sugar (C++14/17/20)
|
||||
template <typename G>
|
||||
requires std::same_v<G, F> // SFINAE via C++20 concept, usage of C++14 template alias
|
||||
auto func(const E<G> & e) // auto return type (C++14)
|
||||
{
|
||||
if (elements.contains(e)) // C++20 .contains membership test
|
||||
{
|
||||
// ...
|
||||
}
|
||||
|
||||
elements.erase_if(
|
||||
elements,
|
||||
[&](const auto x){
|
||||
return x == 1;
|
||||
}); // C++20 std::erase_if
|
||||
|
||||
return {1, 2}; // or: return std::pair(1, 2); // create pair via initialization list or value initialization (C++17)
|
||||
}
|
||||
```
|
||||
|
||||
## Platform {#platform}
|
||||
|
||||
**1.** We write code for a specific platform.
|
||||
|
@ -45,7 +45,7 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
|
||||
# for hits_v1
|
||||
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||
# for hits_100m_obfuscated
|
||||
clickhouse-client --query="CREATE TABLE hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||
|
||||
# import data
|
||||
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000
|
||||
|
@ -9,66 +9,66 @@ ClickHouse can accept and return data in various formats. A format supported for
|
||||
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
|
||||
|
||||
The supported formats are:
|
||||
| Input | Output |
|
||||
|-------------------------------------------------------------------------------------------|-------|-------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Template](#format-template) | ✔ | ✔ |
|
||||
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
|
||||
| [CSV](#csv) | ✔ | ✔ |
|
||||
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
|
||||
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
|
||||
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Values](#data-format-values) | ✔ | ✔ |
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✗ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TSKV](#tskv) | ✔ | ✔ |
|
||||
| [Pretty](#pretty) | ✗ | ✔ |
|
||||
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
|
||||
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✔ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✗ |
|
||||
| [Regexp](#data-format-regexp) | ✔ | ✗ |
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [MySQLDump](#mysqldump) | ✔ | ✗ |
|
||||
| Format | Input | Output |
|
||||
|-------------------------------------------------------------------------------------------|-------|--------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Template](#format-template) | ✔ | ✔ |
|
||||
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
|
||||
| [CSV](#csv) | ✔ | ✔ |
|
||||
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
|
||||
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
|
||||
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Values](#data-format-values) | ✔ | ✔ |
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✗ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TSKV](#tskv) | ✔ | ✔ |
|
||||
| [Pretty](#pretty) | ✗ | ✔ |
|
||||
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
|
||||
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✔ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✗ |
|
||||
| [Regexp](#data-format-regexp) | ✔ | ✗ |
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [MySQLDump](#mysqldump) | ✔ | ✗ |
|
||||
|
||||
|
||||
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
|
||||
|
@ -426,7 +426,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
|
||||
|
||||
- `status` — use with `static` type, response status code.
|
||||
|
||||
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
- `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
|
||||
- `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client.
|
||||
|
||||
|
57
docs/en/interfaces/postgresql.md
Normal file
57
docs/en/interfaces/postgresql.md
Normal file
@ -0,0 +1,57 @@
|
||||
---
|
||||
sidebar_position: 20
|
||||
sidebar_label: PostgreSQL Interface
|
||||
---
|
||||
|
||||
# PostgreSQL Interface
|
||||
|
||||
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift).
|
||||
|
||||
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<postgresql_port>9005</postgresql_port>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Startup your ClickHouse server and look for a log message similar to the following that mentions **Listening for PostgreSQL compatibility protocol**:
|
||||
|
||||
```response
|
||||
{} <Information> Application: Listening for PostgreSQL compatibility protocol: 127.0.0.1:9005
|
||||
```
|
||||
|
||||
## Connect psql to ClickHouse
|
||||
|
||||
The following command demonstrates how to connect the PostgreSQL client `psql` to ClickHouse:
|
||||
|
||||
```bash
|
||||
psql -p [port] -h [hostname] -U [username] [database_name]
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
psql -p 9005 -h 127.0.0.1 -U alice default
|
||||
```
|
||||
|
||||
:::note
|
||||
The `psql` client requires a login with a password, so you will not be able connect using the `default` user with no password. Either assign a password to the `default` user, or login as a different user.
|
||||
:::
|
||||
|
||||
The `psql` client prompts for the password:
|
||||
|
||||
```response
|
||||
Password for user alice:
|
||||
psql (14.2, server 22.3.1.1)
|
||||
WARNING: psql major version 14, server major version 22.
|
||||
Some psql features might not work.
|
||||
Type "help" for help.
|
||||
|
||||
default=>
|
||||
```
|
||||
|
||||
And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse.
|
||||
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/interfaces/postgresql)
|
@ -1,48 +0,0 @@
|
||||
---
|
||||
sidebar_position: 108
|
||||
---
|
||||
|
||||
# groupArraySorted {#groupArraySorted}
|
||||
|
||||
Returns an array with the first N items in ascending order.
|
||||
|
||||
``` sql
|
||||
groupArraySorted(N)(column)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `N` – The number of elements to return.
|
||||
|
||||
If the parameter is omitted, default value 10 is used.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `column` – The value.
|
||||
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
|
||||
|
||||
**Example**
|
||||
|
||||
Gets the first 10 numbers:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number)─┐
|
||||
│ [0,1,2,3,4,5,6,7,8,9] │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
Or the last 10:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number, negate(number))─┐
|
||||
│ [99,98,97,96,95,94,93,92,91,90] │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
@ -620,9 +620,9 @@ arraySlice(array, offset[, length])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `array` – Array of data.
|
||||
- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
|
||||
- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
|
||||
- `array` – Array of data.
|
||||
- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
|
||||
- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length]`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -130,13 +130,9 @@ bitSlice(s, offset[, length])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s` — s is [String](../../sql-reference/data-types/string.md)
|
||||
or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an
|
||||
indent on the right. Numbering of the bits begins with 1.
|
||||
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [
|
||||
offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string].
|
||||
If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
|
||||
- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
|
||||
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -478,3 +478,17 @@ Result:
|
||||
│ 0 │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
|
||||
│ 0 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -480,7 +480,7 @@ Result:
|
||||
|
||||
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
|
||||
|
||||
Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL). The ‘offset’ and ‘length’ arguments must be constants.
|
||||
Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL).
|
||||
|
||||
## substringUTF8(s, offset, length) {#substringutf8}
|
||||
|
||||
|
@ -410,7 +410,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
|
||||
- `status` — используется с типом `static`, возвращает код состояния ответа.
|
||||
|
||||
- `content_type` — используется с типом `static`, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
- `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
|
||||
- `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса ‘file://’ or ‘config://’, находит содержимое из файла или конфигурации, отправленного клиенту.
|
||||
|
||||
|
@ -31,7 +31,5 @@ sidebar_label: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64
|
||||
- `UInt16` — \[0 : 65535\]
|
||||
- `UInt32` — \[0 : 4294967295\]
|
||||
- `UInt64` — \[0 : 18446744073709551615\]
|
||||
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
|
||||
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
|
||||
|
||||
`UInt128` пока не реализован.
|
||||
|
||||
|
@ -21,7 +21,7 @@ LowCardinality(data_type)
|
||||
|
||||
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
|
||||
|
||||
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||
Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||
|
||||
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
||||
|
||||
|
@ -574,9 +574,9 @@ arraySlice(array, offset[, length])
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `array` – массив данных.
|
||||
- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1.
|
||||
- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
|
||||
- `array` – массив данных.
|
||||
- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчёт элементов массива начинается с 1.
|
||||
- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -446,3 +446,17 @@ SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16');
|
||||
│ 0 │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
|
||||
│ 0 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -157,6 +157,7 @@ def build(args):
|
||||
|
||||
if not args.skip_website:
|
||||
website.process_benchmark_results(args)
|
||||
website.minify_website(args)
|
||||
redirects.build_static_redirects(args)
|
||||
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
import bs4
|
||||
|
||||
import util
|
||||
@ -178,6 +181,59 @@ def build_website(args):
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def get_css_in(args):
|
||||
return [
|
||||
f"'{args.website_dir}/css/bootstrap.css'",
|
||||
f"'{args.website_dir}/css/docsearch.css'",
|
||||
f"'{args.website_dir}/css/base.css'",
|
||||
f"'{args.website_dir}/css/blog.css'",
|
||||
f"'{args.website_dir}/css/docs.css'",
|
||||
f"'{args.website_dir}/css/highlight.css'",
|
||||
f"'{args.website_dir}/css/main.css'",
|
||||
]
|
||||
|
||||
|
||||
def get_js_in(args):
|
||||
return [
|
||||
f"'{args.website_dir}/js/jquery.js'",
|
||||
f"'{args.website_dir}/js/popper.js'",
|
||||
f"'{args.website_dir}/js/bootstrap.js'",
|
||||
f"'{args.website_dir}/js/sentry.js'",
|
||||
f"'{args.website_dir}/js/base.js'",
|
||||
f"'{args.website_dir}/js/index.js'",
|
||||
f"'{args.website_dir}/js/docsearch.js'",
|
||||
f"'{args.website_dir}/js/docs.js'",
|
||||
f"'{args.website_dir}/js/main.js'",
|
||||
]
|
||||
|
||||
|
||||
def minify_website(args):
|
||||
css_in = " ".join(get_css_in(args))
|
||||
css_out = f"{args.output_dir}/docs/css/base.css"
|
||||
os.makedirs(f"{args.output_dir}/docs/css")
|
||||
|
||||
command = f"cat {css_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(css_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(css_out, "rb") as f:
|
||||
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
|
||||
js_in = " ".join(get_js_in(args))
|
||||
js_out = f"{args.output_dir}/docs/js/base.js"
|
||||
os.makedirs(f"{args.output_dir}/docs/js")
|
||||
|
||||
command = f"cat {js_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(js_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(js_out, "rb") as f:
|
||||
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
logging.info(js_digest)
|
||||
|
||||
|
||||
def process_benchmark_results(args):
|
||||
benchmark_root = os.path.join(args.website_dir, "benchmark")
|
||||
required_keys = {
|
||||
|
@ -397,9 +397,9 @@ SELECT arrayPushFront(['b'], 'a') AS res
|
||||
|
||||
**参数**
|
||||
|
||||
- `array` – 数组。
|
||||
- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。
|
||||
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。
|
||||
- `array` – 数组。
|
||||
- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。
|
||||
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length]`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。
|
||||
|
||||
**示例**
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
@ -48,6 +49,7 @@
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace std::literals;
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -1038,6 +1040,158 @@ void Client::processConfig()
|
||||
client_info.quota_key = config().getString("quota_key", "");
|
||||
}
|
||||
|
||||
|
||||
void Client::readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||
{
|
||||
/** We allow different groups of arguments:
|
||||
* - common arguments;
|
||||
* - arguments for any number of external tables each in form "--external args...",
|
||||
* where possible args are file, name, format, structure, types;
|
||||
* - param arguments for prepared statements.
|
||||
* Split these groups before processing.
|
||||
*/
|
||||
bool in_external_group = false;
|
||||
|
||||
std::string prev_host_arg;
|
||||
std::string prev_port_arg;
|
||||
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
{
|
||||
std::string_view arg = argv[arg_num];
|
||||
|
||||
if (arg == "--external")
|
||||
{
|
||||
in_external_group = true;
|
||||
external_tables_arguments.emplace_back(Arguments{""});
|
||||
}
|
||||
/// Options with value after equal sign.
|
||||
else if (
|
||||
in_external_group
|
||||
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|
||||
|| arg.starts_with("--types=")))
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
/// Options with value after whitespace.
|
||||
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
|
||||
{
|
||||
if (arg_num + 1 < argc)
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
++arg_num;
|
||||
arg = argv[arg_num];
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
in_external_group = false;
|
||||
if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg);
|
||||
|
||||
/// Parameter arg after underline.
|
||||
if (arg.starts_with("--param_"))
|
||||
{
|
||||
auto param_continuation = arg.substr(strlen("--param_"));
|
||||
auto equal_pos = param_continuation.find_first_of('=');
|
||||
|
||||
if (equal_pos == std::string::npos)
|
||||
{
|
||||
/// param_name value
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
query_parameters.emplace(String(param_continuation), String(arg));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (equal_pos == 0)
|
||||
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
/// param_name=value
|
||||
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--host") || arg.starts_with("-h"))
|
||||
{
|
||||
std::string host_arg;
|
||||
/// --host host
|
||||
if (arg == "--host" || arg == "-h")
|
||||
{
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
host_arg = "--host=";
|
||||
host_arg.append(arg);
|
||||
}
|
||||
else
|
||||
host_arg = arg;
|
||||
|
||||
/// --port port1 --host host1
|
||||
if (!prev_port_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
|
||||
prev_port_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --host host1 --host host2
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
|
||||
prev_host_arg = host_arg;
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--port"))
|
||||
{
|
||||
auto port_arg = String{arg};
|
||||
/// --port port
|
||||
if (arg == "--port")
|
||||
{
|
||||
port_arg.push_back('=');
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
port_arg.append(arg);
|
||||
}
|
||||
|
||||
/// --host host1 --port port1
|
||||
if (!prev_host_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
|
||||
prev_host_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --port port1 --port port2
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
|
||||
prev_port_arg = port_arg;
|
||||
}
|
||||
}
|
||||
else if (arg == "--allow_repeated_settings")
|
||||
allow_repeated_settings = true;
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
}
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -36,6 +36,13 @@ protected:
|
||||
|
||||
void processConfig() override;
|
||||
|
||||
void readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments) override;
|
||||
|
||||
private:
|
||||
void printChangedSettings() const;
|
||||
std::vector<String> loadWarningMessages();
|
||||
|
@ -68,6 +68,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_ENOUGH_SPACE;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_KILL;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1062,8 +1063,11 @@ namespace
|
||||
return pid;
|
||||
}
|
||||
|
||||
int stop(const fs::path & pid_file, bool force)
|
||||
int stop(const fs::path & pid_file, bool force, bool do_not_kill)
|
||||
{
|
||||
if (force && do_not_kill)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible");
|
||||
|
||||
UInt64 pid = isRunning(pid_file);
|
||||
|
||||
if (!pid)
|
||||
@ -1092,9 +1096,15 @@ namespace
|
||||
|
||||
if (try_num == num_tries)
|
||||
{
|
||||
fmt::print("Will terminate forcefully.\n", pid);
|
||||
if (do_not_kill)
|
||||
{
|
||||
fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fmt::print("Will terminate forcefully (pid = {}).\n", pid);
|
||||
if (0 == kill(pid, 9))
|
||||
fmt::print("Sent kill signal.\n", pid);
|
||||
fmt::print("Sent kill signal (pid = {}).\n", pid);
|
||||
else
|
||||
throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
@ -1175,6 +1185,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
|
||||
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
|
||||
("force", po::bool_switch(), "Stop with KILL signal instead of TERM")
|
||||
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -1189,7 +1200,9 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
fs::path prefix = options["prefix"].as<std::string>();
|
||||
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
|
||||
|
||||
return stop(pid_file, options["force"].as<bool>());
|
||||
bool force = options["force"].as<bool>();
|
||||
bool do_not_kill = options["do-not-kill"].as<bool>();
|
||||
return stop(pid_file, force, do_not_kill);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -1247,6 +1260,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
|
||||
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
|
||||
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
|
||||
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -1265,7 +1279,9 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
fs::path config = prefix / options["config-path"].as<std::string>() / "config.xml";
|
||||
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
|
||||
|
||||
if (int res = stop(pid_file, options["force"].as<bool>()))
|
||||
bool force = options["force"].as<bool>();
|
||||
bool do_not_kill = options["do-not-kill"].as<bool>();
|
||||
if (int res = stop(pid_file, force, do_not_kill))
|
||||
return res;
|
||||
|
||||
return start(user, executable, config, pid_file);
|
||||
|
@ -738,6 +738,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
|
||||
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
|
||||
}
|
||||
|
||||
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
|
||||
{
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
{
|
||||
const char * arg = argv[arg_num];
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
|
@ -45,6 +45,8 @@ protected:
|
||||
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
|
||||
|
||||
void processConfig() override;
|
||||
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
|
||||
|
||||
|
||||
void updateLoggerLevel(const String & logs_level) override;
|
||||
|
||||
|
@ -540,7 +540,7 @@ static void sanityChecks(Server & server)
|
||||
try
|
||||
{
|
||||
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
|
||||
server.context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded.");
|
||||
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded.");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -558,7 +558,7 @@ static void sanityChecks(Server & server)
|
||||
try
|
||||
{
|
||||
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
|
||||
server.context()->addWarningMessage("Linux transparent hugepage are set to \"always\".");
|
||||
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\".");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -1088,11 +1088,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
|
||||
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
|
||||
if (config->has("global_memory_usage_overcommit_max_wait_microseconds"))
|
||||
{
|
||||
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 0);
|
||||
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
|
||||
}
|
||||
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 200);
|
||||
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
|
||||
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
|
||||
|
||||
// FIXME logging-related things need synchronization -- see the 'Logger * log' saved
|
||||
@ -1294,17 +1291,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
LOG_INFO(log, "Listening for {}", server.getDescription());
|
||||
}
|
||||
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
if (config().has("custom_settings_prefixes"))
|
||||
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
|
||||
|
||||
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
|
||||
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
|
||||
|
||||
/// Initialize access storages.
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
try
|
||||
{
|
||||
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
|
||||
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -545,6 +545,14 @@
|
||||
-->
|
||||
</user_directories>
|
||||
|
||||
<access_control_improvements>
|
||||
<!-- Enables logic that users without permissive row policies can still read rows using a SELECT query.
|
||||
For example, if there two users A, B and a row policy is defined only for A, then
|
||||
if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
|
||||
By default this setting is false for compatibility with earlier access configurations. -->
|
||||
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
|
||||
</access_control_improvements>
|
||||
|
||||
<!-- Default profile of settings. -->
|
||||
<default_profile>default</default_profile>
|
||||
|
||||
|
@ -129,8 +129,8 @@
|
||||
|
||||
#query_div
|
||||
{
|
||||
/* Make enough space for even huge queries. */
|
||||
height: 20%;
|
||||
/* Make enough space for medium/large queries but allowing query textarea to grow. */
|
||||
min-height: 20%;
|
||||
}
|
||||
|
||||
#query
|
||||
@ -748,7 +748,7 @@
|
||||
const max_rows = 10000 / response.meta.length;
|
||||
let row_num = 0;
|
||||
|
||||
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(U?Int|Decimal|Float)/));
|
||||
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
|
||||
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
|
||||
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
|
||||
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);
|
||||
|
@ -6,9 +6,6 @@
|
||||
<profiles>
|
||||
<!-- Default settings. -->
|
||||
<default>
|
||||
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||
<max_memory_usage>10000000000</max_memory_usage>
|
||||
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
|
@ -149,6 +149,24 @@ AccessControl::AccessControl()
|
||||
|
||||
AccessControl::~AccessControl() = default;
|
||||
|
||||
|
||||
void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
|
||||
const zkutil::GetZooKeeper & get_zookeeper_function_)
|
||||
{
|
||||
if (config_.has("custom_settings_prefixes"))
|
||||
setCustomSettingsPrefixes(config_.getString("custom_settings_prefixes"));
|
||||
|
||||
setNoPasswordAllowed(config_.getBool("allow_no_password", true));
|
||||
setPlaintextPasswordAllowed(config_.getBool("allow_plaintext_password", true));
|
||||
|
||||
setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool(
|
||||
"access_control_improvements.users_without_row_policies_can_read_rows",
|
||||
false /* false because we need to be compatible with earlier access configurations */));
|
||||
|
||||
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
|
||||
}
|
||||
|
||||
|
||||
void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
|
||||
{
|
||||
auto storages = getStoragesPtr();
|
||||
@ -170,11 +188,7 @@ void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguratio
|
||||
|
||||
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
|
||||
{
|
||||
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
|
||||
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
|
||||
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
|
||||
is_no_password_allowed_function, is_plaintext_password_allowed_function);
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
|
||||
new_storage->setConfig(users_config_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
|
||||
@ -207,11 +221,7 @@ void AccessControl::addUsersConfigStorage(
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
|
||||
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
|
||||
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
|
||||
is_no_password_allowed_function, is_plaintext_password_allowed_function);
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
|
||||
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
|
||||
|
@ -50,6 +50,9 @@ public:
|
||||
AccessControl();
|
||||
~AccessControl() override;
|
||||
|
||||
void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
|
||||
const zkutil::GetZooKeeper & get_zookeeper_function_);
|
||||
|
||||
/// Parses access entities from a configuration loaded from users.xml.
|
||||
/// This function add UsersConfigAccessStorage if it wasn't added before.
|
||||
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
|
||||
@ -122,6 +125,12 @@ public:
|
||||
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
|
||||
bool isPlaintextPasswordAllowed() const;
|
||||
|
||||
/// Enables logic that users without permissive row policies can still read rows using a SELECT query.
|
||||
/// For example, if there two users A, B and a row policy is defined only for A, then
|
||||
/// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
|
||||
void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; }
|
||||
bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; }
|
||||
|
||||
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
|
||||
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
@ -178,6 +187,7 @@ private:
|
||||
std::unique_ptr<CustomSettingsPrefixes> custom_settings_prefixes;
|
||||
std::atomic_bool allow_plaintext_password = true;
|
||||
std::atomic_bool allow_no_password = true;
|
||||
std::atomic_bool users_without_row_policies_can_read_rows = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -28,17 +28,25 @@ namespace
|
||||
permissions.push_back(filter);
|
||||
}
|
||||
|
||||
ASTPtr getResult() &&
|
||||
ASTPtr getResult(bool users_without_row_policies_can_read_rows) &&
|
||||
{
|
||||
/// Process permissive filters.
|
||||
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
|
||||
if (!permissions.empty() || !users_without_row_policies_can_read_rows)
|
||||
{
|
||||
/// Process permissive filters.
|
||||
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
|
||||
}
|
||||
|
||||
/// Process restrictive filters.
|
||||
auto result = makeASTForLogicalAnd(std::move(restrictions));
|
||||
ASTPtr result;
|
||||
if (!restrictions.empty())
|
||||
result = makeASTForLogicalAnd(std::move(restrictions));
|
||||
|
||||
bool value;
|
||||
if (tryGetLiteralBool(result.get(), value) && value)
|
||||
result = nullptr; /// The condition is always true, no need to check it.
|
||||
if (result)
|
||||
{
|
||||
bool value;
|
||||
if (tryGetLiteralBool(result.get(), value) && value)
|
||||
result = nullptr; /// The condition is always true, no need to check it.
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -234,7 +242,7 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
|
||||
{
|
||||
auto & mixed_filter = (*mixed_filters)[key];
|
||||
mixed_filter.database_and_table_name = mixer.database_and_table_name;
|
||||
mixed_filter.ast = std::move(mixer.mixer).getResult();
|
||||
mixed_filter.ast = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
|
||||
}
|
||||
|
||||
enabled.mixed_filters.store(mixed_filters);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Access/RowPolicy.h>
|
||||
#include <Access/User.h>
|
||||
#include <Access/SettingsProfile.h>
|
||||
#include <Access/AccessControl.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
@ -339,7 +340,7 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config)
|
||||
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config, bool users_without_row_policies_can_read_rows)
|
||||
{
|
||||
std::map<std::pair<String /* database */, String /* table */>, std::unordered_map<String /* user */, String /* filter */>> all_filters_map;
|
||||
|
||||
@ -395,8 +396,19 @@ namespace
|
||||
const auto & [database, table_name] = database_and_table_name;
|
||||
for (const String & user_name : user_names)
|
||||
{
|
||||
String filter;
|
||||
auto it = user_to_filters.find(user_name);
|
||||
String filter = (it != user_to_filters.end()) ? it->second : "1";
|
||||
if (it != user_to_filters.end())
|
||||
{
|
||||
filter = it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (users_without_row_policies_can_read_rows)
|
||||
continue;
|
||||
else
|
||||
filter = "1";
|
||||
}
|
||||
|
||||
auto policy = std::make_shared<RowPolicy>();
|
||||
policy->setFullName(user_name, database, table_name);
|
||||
@ -411,7 +423,7 @@ namespace
|
||||
|
||||
SettingsProfileElements parseSettingsConstraints(const Poco::Util::AbstractConfiguration & config,
|
||||
const String & path_to_constraints,
|
||||
Fn<void(std::string_view)> auto && check_setting_name_function)
|
||||
const AccessControl & access_control)
|
||||
{
|
||||
SettingsProfileElements profile_elements;
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
@ -419,8 +431,7 @@ namespace
|
||||
|
||||
for (const String & setting_name : keys)
|
||||
{
|
||||
if (check_setting_name_function)
|
||||
check_setting_name_function(setting_name);
|
||||
access_control.checkSettingNameIsAllowed(setting_name);
|
||||
|
||||
SettingsProfileElement profile_element;
|
||||
profile_element.setting_name = setting_name;
|
||||
@ -448,7 +459,7 @@ namespace
|
||||
std::shared_ptr<SettingsProfile> parseSettingsProfile(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & profile_name,
|
||||
Fn<void(std::string_view)> auto && check_setting_name_function)
|
||||
const AccessControl & access_control)
|
||||
{
|
||||
auto profile = std::make_shared<SettingsProfile>();
|
||||
profile->setName(profile_name);
|
||||
@ -470,13 +481,12 @@ namespace
|
||||
|
||||
if (key == "constraints" || key.starts_with("constraints["))
|
||||
{
|
||||
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, check_setting_name_function));
|
||||
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, access_control));
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto & setting_name = key;
|
||||
if (check_setting_name_function)
|
||||
check_setting_name_function(setting_name);
|
||||
access_control.checkSettingNameIsAllowed(setting_name);
|
||||
|
||||
SettingsProfileElement profile_element;
|
||||
profile_element.setting_name = setting_name;
|
||||
@ -490,7 +500,7 @@ namespace
|
||||
|
||||
std::vector<AccessEntityPtr> parseSettingsProfiles(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
Fn<void(std::string_view)> auto && check_setting_name_function)
|
||||
const AccessControl & access_control)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys profile_names;
|
||||
config.keys("profiles", profile_names);
|
||||
@ -502,7 +512,7 @@ namespace
|
||||
{
|
||||
try
|
||||
{
|
||||
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
|
||||
profiles.push_back(parseSettingsProfile(config, profile_name, access_control));
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
@ -515,13 +525,8 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
UsersConfigAccessStorage::UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
|
||||
: UsersConfigAccessStorage(STORAGE_TYPE, check_setting_name_function_, is_no_password_allowed_function_, is_plaintext_password_allowed_function_)
|
||||
{
|
||||
}
|
||||
|
||||
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
|
||||
: IAccessStorage(storage_name_), check_setting_name_function(check_setting_name_function_),is_no_password_allowed_function(is_no_password_allowed_function_), is_plaintext_password_allowed_function(is_plaintext_password_allowed_function_)
|
||||
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_)
|
||||
: IAccessStorage(storage_name_), access_control(access_control_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -563,16 +568,16 @@ void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfigu
|
||||
{
|
||||
try
|
||||
{
|
||||
bool no_password_allowed = is_no_password_allowed_function();
|
||||
bool plaintext_password_allowed = is_plaintext_password_allowed_function();
|
||||
bool no_password_allowed = access_control.isNoPasswordAllowed();
|
||||
bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed();
|
||||
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
|
||||
for (const auto & entity : parseUsers(config, no_password_allowed, plaintext_password_allowed))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
for (const auto & entity : parseQuotas(config))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
for (const auto & entity : parseRowPolicies(config))
|
||||
for (const auto & entity : parseRowPolicies(config, access_control.isEnabledUsersWithoutRowPoliciesCanReadRows()))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
|
||||
for (const auto & entity : parseSettingsProfiles(config, access_control))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
memory_storage.setAll(all_entities);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ namespace Poco::Util
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class AccessControl;
|
||||
class ConfigReloader;
|
||||
|
||||
/// Implementation of IAccessStorage which loads all from users.xml periodically.
|
||||
@ -20,13 +21,8 @@ class UsersConfigAccessStorage : public IAccessStorage
|
||||
public:
|
||||
|
||||
static constexpr char STORAGE_TYPE[] = "users.xml";
|
||||
using CheckSettingNameFunction = std::function<void(const std::string_view &)>;
|
||||
using IsNoPasswordFunction = std::function<bool()>;
|
||||
using IsPlaintextPasswordFunction = std::function<bool()>;
|
||||
|
||||
UsersConfigAccessStorage(const String & storage_name_ = STORAGE_TYPE, const CheckSettingNameFunction & check_setting_name_function_ = {}, const IsNoPasswordFunction & is_no_password_allowed_function_ ={}, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_ = {}); /// NOLINT
|
||||
UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_); /// NOLINT
|
||||
|
||||
UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_);
|
||||
~UsersConfigAccessStorage() override;
|
||||
|
||||
const char * getStorageType() const override { return STORAGE_TYPE; }
|
||||
@ -58,10 +54,8 @@ private:
|
||||
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
|
||||
scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override;
|
||||
|
||||
const AccessControl & access_control;
|
||||
MemoryAccessStorage memory_storage;
|
||||
CheckSettingNameFunction check_setting_name_function;
|
||||
IsNoPasswordFunction is_no_password_allowed_function;
|
||||
IsPlaintextPasswordFunction is_plaintext_password_allowed_function;
|
||||
String path;
|
||||
std::unique_ptr<ConfigReloader> config_reloader;
|
||||
mutable std::mutex load_mutex;
|
||||
|
@ -1,147 +0,0 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
|
||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
|
||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
|
||||
{
|
||||
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
|
||||
};
|
||||
|
||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySortedFieldType
|
||||
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
|
||||
{
|
||||
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
|
||||
AggregateFunctionGroupArraySorted;
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
|
||||
};
|
||||
|
||||
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
|
||||
}
|
||||
|
||||
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
|
||||
{
|
||||
#define DISPATCH(A, C, B) \
|
||||
if (which.idx == TypeIndex::A) \
|
||||
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
|
||||
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
|
||||
WhichDataType which(argument_types[0]);
|
||||
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
|
||||
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
|
||||
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
|
||||
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
|
||||
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
|
||||
#undef DISPATCH
|
||||
#undef DISPATCH_NUMERIC
|
||||
|
||||
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
|
||||
threshold, argument_types, params));
|
||||
}
|
||||
else
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
|
||||
threshold, argument_types, params));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
|
||||
|
||||
if (params.size() == 1)
|
||||
{
|
||||
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
||||
|
||||
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
|
||||
throw Exception(
|
||||
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
if (k == 0)
|
||||
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
threshold = k;
|
||||
}
|
||||
else if (!params.empty())
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
if (argument_types.size() == 2)
|
||||
{
|
||||
if (isNumber(argument_types[1]))
|
||||
{
|
||||
#define DISPATCH2(A, B) \
|
||||
if (which.idx == TypeIndex::A) \
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
|
||||
#define DISPATCH(A) DISPATCH2(A, A)
|
||||
WhichDataType which(argument_types[1]);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
DISPATCH2(Enum8, Int8)
|
||||
DISPATCH2(Enum16, Int16)
|
||||
#undef DISPATCH
|
||||
#undef DISPATCH2
|
||||
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
|
||||
}
|
||||
}
|
||||
else if (argument_types.size() == 1)
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
|
||||
}
|
||||
}
|
@ -1,315 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename TColumn, bool is_plain>
|
||||
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||
{
|
||||
if constexpr (is_plain)
|
||||
{
|
||||
StringRef str = column->getDataAt(row);
|
||||
auto ptr = arena->alloc(str.size);
|
||||
std::copy(str.data, str.data + str.size, ptr);
|
||||
return StringRef(ptr, str.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
return column->serializeValueIntoArena(row, *arena, begin);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumn, UInt64>)
|
||||
return column->getUInt(row);
|
||||
else
|
||||
return column->getInt(row);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn, typename TFilter = void>
|
||||
size_t
|
||||
getFirstNElements_low_threshold(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
||||
{
|
||||
for (size_t i = 0; i < threshold; i++)
|
||||
{
|
||||
results[i] = 0;
|
||||
}
|
||||
|
||||
threshold = std::min(row_end - row_begin, threshold);
|
||||
size_t current_max = 0;
|
||||
size_t cur;
|
||||
size_t z;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if constexpr (!std::is_same_v<TFilter, void>)
|
||||
{
|
||||
if (filter[i] == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
//Starting from the highest values and we look for the immediately lower than the given one
|
||||
for (cur = current_max; cur > 0; cur--)
|
||||
{
|
||||
if (data[i] > data[results[cur - 1]])
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur < threshold)
|
||||
{
|
||||
//Move all the higher values 1 position to the right
|
||||
for (z = std::min(threshold - 1, current_max); z > cur; z--)
|
||||
results[z] = results[z - 1];
|
||||
|
||||
if (current_max < threshold)
|
||||
++current_max;
|
||||
|
||||
//insert element into the given position
|
||||
results[cur] = i;
|
||||
}
|
||||
}
|
||||
|
||||
return current_max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct SortableItem
|
||||
{
|
||||
T a;
|
||||
size_t b;
|
||||
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
|
||||
};
|
||||
|
||||
template <typename TColumn, typename TFilter = void>
|
||||
size_t getFirstNElements_high_threshold(
|
||||
const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
||||
{
|
||||
std::vector<SortableItem<TColumn>> dataIndexed(row_end);
|
||||
size_t num_elements_filtered = 0;
|
||||
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if constexpr (!std::is_same_v<TFilter, void>)
|
||||
{
|
||||
if (filter[i] == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
dataIndexed.data()[num_elements_filtered].a = data[i];
|
||||
dataIndexed.data()[num_elements_filtered].b = i;
|
||||
num_elements_filtered++;
|
||||
}
|
||||
|
||||
threshold = std::min(num_elements_filtered, threshold);
|
||||
|
||||
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
|
||||
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
|
||||
|
||||
for (size_t i = 0; i < threshold; i++)
|
||||
{
|
||||
results[i] = dataIndexed[i].b;
|
||||
}
|
||||
|
||||
return threshold;
|
||||
}
|
||||
|
||||
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
|
||||
|
||||
template <typename TColumn>
|
||||
size_t getFirstNElements(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
|
||||
{
|
||||
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
|
||||
{
|
||||
if (filter != nullptr)
|
||||
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results, filter);
|
||||
else
|
||||
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filter != nullptr)
|
||||
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results, filter);
|
||||
else
|
||||
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
|
||||
{
|
||||
protected:
|
||||
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
|
||||
using Base = IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted>;
|
||||
|
||||
UInt64 threshold;
|
||||
DataTypePtr & input_data_type;
|
||||
mutable std::mutex mutex;
|
||||
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted>(argument_types_, params)
|
||||
, threshold(threshold_)
|
||||
, input_data_type(this->argument_types[0])
|
||||
{
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr place) const override
|
||||
{
|
||||
Base::create(place);
|
||||
this->data(place).threshold = threshold;
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArraySorted"; }
|
||||
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
State & data = this->data(place);
|
||||
if constexpr (use_column_b)
|
||||
{
|
||||
data.add(
|
||||
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
|
||||
}
|
||||
else
|
||||
{
|
||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn, bool is_plain, typename TFunc>
|
||||
void
|
||||
forFirstRows(size_t row_begin, size_t row_end, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
|
||||
{
|
||||
const TColumn * values = nullptr;
|
||||
std::unique_ptr<std::vector<TColumn>> values_vector;
|
||||
std::vector<size_t> best_rows(threshold);
|
||||
|
||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||
{
|
||||
values_vector.reset(new std::vector<TColumn>(row_end));
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
|
||||
values = (*values_vector).data();
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
|
||||
values = column.getData().data();
|
||||
}
|
||||
|
||||
const UInt8 * filter = nullptr;
|
||||
StringRef refFilter;
|
||||
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
refFilter = columns[if_argument_pos]->getRawData();
|
||||
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
|
||||
}
|
||||
|
||||
size_t num_elements = getFirstNElements(values, row_begin, row_end, threshold, best_rows.data(), filter);
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
func(best_rows[i], values);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
State & data = this->data(place);
|
||||
|
||||
if constexpr (use_column_b)
|
||||
{
|
||||
forFirstRows<TColumnB, is_plain_b>(
|
||||
row_begin, row_end, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
|
||||
{
|
||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
forFirstRows<TColumnA, is_plain_a>(
|
||||
row_begin, row_end, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
|
||||
{
|
||||
data.add(values[row]);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void
|
||||
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).deserialize(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
auto & values = this->data(place).values;
|
||||
offsets_to.push_back(offsets_to.back() + values.size());
|
||||
|
||||
IColumn & data_to = arr_to.getData();
|
||||
for (auto value : values)
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||
{
|
||||
auto str = State::itemValue(value);
|
||||
if constexpr (is_plain_a)
|
||||
{
|
||||
data_to.insertData(str.data, str.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.deserializeAndInsertFromArena(str.data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.insert(State::itemValue(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
@ -1,162 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T>
|
||||
static void writeOneItem(WriteBuffer & buf, T item)
|
||||
{
|
||||
if constexpr (std::numeric_limits<T>::is_signed)
|
||||
{
|
||||
writeVarInt(item, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeVarUInt(item, buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
|
||||
{
|
||||
writeBinary(item, buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
|
||||
{
|
||||
if constexpr (std::numeric_limits<T>::is_signed)
|
||||
{
|
||||
DB::Int64 val;
|
||||
readVarT(val, buf);
|
||||
item = val;
|
||||
}
|
||||
else
|
||||
{
|
||||
DB::UInt64 val;
|
||||
readVarT(val, buf);
|
||||
item = val;
|
||||
}
|
||||
}
|
||||
|
||||
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
|
||||
{
|
||||
item = readStringBinaryInto(*arena, buf);
|
||||
}
|
||||
|
||||
template <typename Storage>
|
||||
struct AggregateFunctionGroupArraySortedDataBase
|
||||
{
|
||||
typedef typename Storage::value_type ValueType;
|
||||
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
|
||||
|
||||
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
|
||||
inline void narrowDown()
|
||||
{
|
||||
while (values.size() > threshold)
|
||||
values.erase(--values.end());
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
|
||||
{
|
||||
values.merge(Storage(other.values));
|
||||
narrowDown();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeOneItem(buf, UInt64(values.size()));
|
||||
for (auto value : values)
|
||||
{
|
||||
serializeItem(buf, value);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
|
||||
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
|
||||
|
||||
void deserialize(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
values.clear();
|
||||
UInt64 length;
|
||||
readOneItem(buf, nullptr, length);
|
||||
|
||||
while (length--)
|
||||
{
|
||||
values.insert(deserializeItem(buf, arena));
|
||||
}
|
||||
|
||||
narrowDown();
|
||||
}
|
||||
|
||||
UInt64 threshold;
|
||||
Storage values;
|
||||
};
|
||||
|
||||
template <typename T, bool expr_sorted, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
|
||||
{
|
||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
|
||||
using Base::Base;
|
||||
|
||||
void add(T item, TIndex weight)
|
||||
{
|
||||
Base::values.insert({weight, item});
|
||||
Base::narrowDown();
|
||||
}
|
||||
|
||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
|
||||
{
|
||||
writeOneItem(buf, value.first);
|
||||
writeOneItem(buf, value.second);
|
||||
}
|
||||
|
||||
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
TIndex first;
|
||||
T second;
|
||||
readOneItem(buf, arena, first);
|
||||
readOneItem(buf, arena, second);
|
||||
|
||||
return {first, second};
|
||||
}
|
||||
|
||||
static T itemValue(typename Base::ValueType & value) { return value.second; }
|
||||
};
|
||||
|
||||
template <typename T, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
|
||||
{
|
||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
|
||||
using Base::Base;
|
||||
|
||||
void add(T item)
|
||||
{
|
||||
Base::values.insert(item);
|
||||
Base::narrowDown();
|
||||
}
|
||||
|
||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
|
||||
|
||||
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
T value;
|
||||
readOneItem(buf, arena, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static T itemValue(typename Base::ValueType & value) { return value; }
|
||||
};
|
||||
}
|
@ -59,7 +59,6 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -131,7 +130,6 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionIntervalLengthSum(factory);
|
||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||
registerAggregateFunctionSparkbar(factory);
|
||||
registerAggregateFunctionGroupArraySorted(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string_view>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
@ -392,7 +391,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
processed_rows += block.rows();
|
||||
|
||||
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
|
||||
initBlockOutputStream(block, parsed_query);
|
||||
initOutputFormat(block, parsed_query);
|
||||
|
||||
/// The header block containing zero rows was used to initialize
|
||||
/// output_format, do not output it.
|
||||
@ -439,14 +438,14 @@ void ClientBase::onLogData(Block & block)
|
||||
|
||||
void ClientBase::onTotals(Block & block, ASTPtr parsed_query)
|
||||
{
|
||||
initBlockOutputStream(block, parsed_query);
|
||||
initOutputFormat(block, parsed_query);
|
||||
output_format->setTotals(block);
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::onExtremes(Block & block, ASTPtr parsed_query)
|
||||
{
|
||||
initBlockOutputStream(block, parsed_query);
|
||||
initOutputFormat(block, parsed_query);
|
||||
output_format->setExtremes(block);
|
||||
}
|
||||
|
||||
@ -466,7 +465,7 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
|
||||
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
|
||||
try
|
||||
{
|
||||
if (!output_format)
|
||||
@ -1489,7 +1488,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
|
||||
if (is_interactive)
|
||||
{
|
||||
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
|
||||
std::cout << std::endl
|
||||
<< processed_rows << " row" << (processed_rows == 1 ? "" : "s")
|
||||
<< " in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
|
||||
progress_indication.writeFinalProgress();
|
||||
std::cout << std::endl << std::endl;
|
||||
}
|
||||
@ -2059,156 +2060,6 @@ void ClientBase::showClientVersion()
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||
{
|
||||
/** We allow different groups of arguments:
|
||||
* - common arguments;
|
||||
* - arguments for any number of external tables each in form "--external args...",
|
||||
* where possible args are file, name, format, structure, types;
|
||||
* - param arguments for prepared statements.
|
||||
* Split these groups before processing.
|
||||
*/
|
||||
|
||||
bool in_external_group = false;
|
||||
|
||||
std::string prev_host_arg;
|
||||
std::string prev_port_arg;
|
||||
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
{
|
||||
std::string_view arg = argv[arg_num];
|
||||
|
||||
if (arg == "--external")
|
||||
{
|
||||
in_external_group = true;
|
||||
external_tables_arguments.emplace_back(Arguments{""});
|
||||
}
|
||||
/// Options with value after equal sign.
|
||||
else if (
|
||||
in_external_group
|
||||
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|
||||
|| arg.starts_with("--types=")))
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
/// Options with value after whitespace.
|
||||
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
|
||||
{
|
||||
if (arg_num + 1 < argc)
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
++arg_num;
|
||||
arg = argv[arg_num];
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
in_external_group = false;
|
||||
|
||||
/// Parameter arg after underline.
|
||||
if (arg.starts_with("--param_"))
|
||||
{
|
||||
auto param_continuation = arg.substr(strlen("--param_"));
|
||||
auto equal_pos = param_continuation.find_first_of('=');
|
||||
|
||||
if (equal_pos == std::string::npos)
|
||||
{
|
||||
/// param_name value
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
query_parameters.emplace(String(param_continuation), String(arg));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (equal_pos == 0)
|
||||
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
/// param_name=value
|
||||
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--host") || arg.starts_with("-h"))
|
||||
{
|
||||
std::string host_arg;
|
||||
/// --host host
|
||||
if (arg == "--host" || arg == "-h")
|
||||
{
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
host_arg = "--host=";
|
||||
host_arg.append(arg);
|
||||
}
|
||||
else
|
||||
host_arg = arg;
|
||||
|
||||
/// --port port1 --host host1
|
||||
if (!prev_port_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
|
||||
prev_port_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --host host1 --host host2
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
|
||||
prev_host_arg = host_arg;
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--port"))
|
||||
{
|
||||
auto port_arg = String{arg};
|
||||
/// --port port
|
||||
if (arg == "--port")
|
||||
{
|
||||
port_arg.push_back('=');
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
port_arg.append(arg);
|
||||
}
|
||||
|
||||
/// --host host1 --port port1
|
||||
if (!prev_host_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
|
||||
prev_host_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --port port1 --port port2
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
|
||||
prev_port_arg = port_arg;
|
||||
}
|
||||
}
|
||||
else if (arg == "--allow_repeated_settings")
|
||||
allow_repeated_settings = true;
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
}
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
}
|
||||
|
||||
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
||||
{
|
||||
if (allow_repeated_settings)
|
||||
|
@ -106,6 +106,14 @@ protected:
|
||||
|
||||
bool processQueryText(const String & text);
|
||||
|
||||
virtual void readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
|
||||
|
||||
|
||||
private:
|
||||
void receiveResult(ASTPtr parsed_query);
|
||||
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
|
||||
@ -131,19 +139,13 @@ private:
|
||||
void sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query);
|
||||
void sendExternalTables(ASTPtr parsed_query);
|
||||
|
||||
void initBlockOutputStream(const Block & block, ASTPtr parsed_query);
|
||||
void initOutputFormat(const Block & block, ASTPtr parsed_query);
|
||||
void initLogsOutputStream();
|
||||
|
||||
String prompt() const;
|
||||
|
||||
void resetOutput();
|
||||
void outputQueryInfo(bool echo_query_);
|
||||
void readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments);
|
||||
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
|
||||
|
||||
void updateSuggest(const ASTPtr & ast);
|
||||
|
@ -90,7 +90,7 @@ void LRUFileCache::initialize()
|
||||
}
|
||||
|
||||
void LRUFileCache::useCell(
|
||||
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
auto file_segment = cell.file_segment;
|
||||
|
||||
@ -109,7 +109,7 @@ void LRUFileCache::useCell(
|
||||
if (cell.queue_iterator)
|
||||
{
|
||||
/// Move to the end of the queue. The iterator remains valid.
|
||||
queue.splice(queue.end(), queue, *cell.queue_iterator);
|
||||
queue.moveToEnd(*cell.queue_iterator, cache_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -237,7 +237,11 @@ FileSegments LRUFileCache::splitRangeIntoCells(
|
||||
}
|
||||
|
||||
void LRUFileCache::fillHolesWithEmptyFileSegments(
|
||||
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
|
||||
FileSegments & file_segments,
|
||||
const Key & key,
|
||||
const FileSegment::Range & range,
|
||||
bool fill_with_detached_file_segments,
|
||||
std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
/// There are segments [segment1, ..., segmentN]
|
||||
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
|
||||
@ -319,7 +323,8 @@ void LRUFileCache::fillHolesWithEmptyFileSegments(
|
||||
}
|
||||
else
|
||||
{
|
||||
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||
file_segments.splice(
|
||||
file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -397,10 +402,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
|
||||
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
|
||||
keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock));
|
||||
|
||||
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
|
||||
FileSegmentCell cell(std::move(file_segment), queue);
|
||||
FileSegmentCell cell(std::move(file_segment), this, cache_lock);
|
||||
|
||||
auto & offsets = files[key];
|
||||
|
||||
@ -425,6 +430,10 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertCacheCorrectness(key, cache_lock);
|
||||
#endif
|
||||
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
if (cell)
|
||||
throw Exception(
|
||||
@ -437,15 +446,15 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
|
||||
}
|
||||
|
||||
bool LRUFileCache::tryReserve(
|
||||
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
auto removed_size = 0;
|
||||
size_t queue_size = queue.size();
|
||||
size_t queue_size = queue.getElementsNum(cache_lock);
|
||||
assert(queue_size <= max_element_size);
|
||||
|
||||
/// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
|
||||
/// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
|
||||
auto * cell_for_reserve = getCell(key_, offset_, cache_lock);
|
||||
auto * cell_for_reserve = getCell(key, offset, cache_lock);
|
||||
|
||||
/// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
|
||||
/// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
|
||||
@ -455,24 +464,27 @@ bool LRUFileCache::tryReserve(
|
||||
auto is_overflow = [&]
|
||||
{
|
||||
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
|
||||
return (max_size != 0 && current_size + size - removed_size > max_size)
|
||||
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
|
||||
|| (max_element_size != 0 && queue_size > max_element_size);
|
||||
};
|
||||
|
||||
std::vector<FileSegmentCell *> to_evict;
|
||||
std::vector<FileSegmentCell *> trash;
|
||||
|
||||
auto key_it = queue.begin();
|
||||
while (is_overflow() && key_it != queue.end())
|
||||
for (const auto & [entry_key, entry_offset, entry_size] : queue)
|
||||
{
|
||||
const auto [key, offset] = *key_it;
|
||||
++key_it;
|
||||
if (!is_overflow())
|
||||
break;
|
||||
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
auto * cell = getCell(entry_key, entry_offset, cache_lock);
|
||||
if (!cell)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache became inconsistent. Key: {}, offset: {}",
|
||||
keyToStr(key), offset);
|
||||
|
||||
size_t cell_size = cell->size();
|
||||
assert(entry_size == cell_size);
|
||||
|
||||
/// It is guaranteed that cell is not removed from cache as long as
|
||||
/// pointer to corresponding file segment is hold by any other thread.
|
||||
@ -495,7 +507,7 @@ bool LRUFileCache::tryReserve(
|
||||
}
|
||||
default:
|
||||
{
|
||||
remove(key, offset, cache_lock, segment_lock);
|
||||
trash.push_back(cell);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -505,11 +517,35 @@ bool LRUFileCache::tryReserve(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// This case is very unlikely, can happen in case of exception from
|
||||
/// file_segment->complete(), which would be a logical error.
|
||||
assert(trash.empty());
|
||||
for (auto & cell : trash)
|
||||
{
|
||||
auto file_segment = cell->file_segment;
|
||||
if (file_segment)
|
||||
{
|
||||
std::lock_guard segment_lock(file_segment->mutex);
|
||||
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_overflow())
|
||||
return false;
|
||||
|
||||
if (cell_for_reserve && !cell_for_reserve->queue_iterator)
|
||||
cell_for_reserve->queue_iterator = queue.insert(queue.end(), std::make_pair(key_, offset_));
|
||||
/// cache cell is nullptr on server startup because we first check for space and then add a cell.
|
||||
if (cell_for_reserve)
|
||||
{
|
||||
/// queue_iteratir is std::nullopt here if no space has been reserved yet, a cache cell
|
||||
/// acquires queue iterator on first successful space reservation attempt.
|
||||
/// If queue iterator already exists, we need to update the size after each space reservation.
|
||||
auto queue_iterator = cell_for_reserve->queue_iterator;
|
||||
if (queue_iterator)
|
||||
queue.incrementSize(*queue_iterator, size, cache_lock);
|
||||
else
|
||||
cell_for_reserve->queue_iterator = queue.add(key, offset, size, cache_lock);
|
||||
}
|
||||
|
||||
for (auto & cell : to_evict)
|
||||
{
|
||||
@ -521,8 +557,7 @@ bool LRUFileCache::tryReserve(
|
||||
}
|
||||
}
|
||||
|
||||
current_size += size - removed_size;
|
||||
if (current_size > (1ull << 63))
|
||||
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
|
||||
|
||||
return true;
|
||||
@ -549,7 +584,10 @@ void LRUFileCache::remove(const Key & key)
|
||||
for (auto & cell : to_remove)
|
||||
{
|
||||
if (!cell->releasable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove file from cache because someone reads from it. File segment info: {}", cell->file_segment->getInfoForLog());
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot remove file from cache because someone reads from it. File segment info: {}",
|
||||
cell->file_segment->getInfoForLog());
|
||||
|
||||
auto file_segment = cell->file_segment;
|
||||
if (file_segment)
|
||||
@ -565,6 +603,10 @@ void LRUFileCache::remove(const Key & key)
|
||||
|
||||
if (fs::exists(key_path))
|
||||
fs::remove(key_path);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertCacheCorrectness(cache_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
void LRUFileCache::remove(bool force_remove_unreleasable)
|
||||
@ -574,20 +616,22 @@ void LRUFileCache::remove(bool force_remove_unreleasable)
|
||||
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
std::vector<FileSegment *> to_remove;
|
||||
for (auto it = queue.begin(); it != queue.end();)
|
||||
{
|
||||
auto & [key, offset] = *it++;
|
||||
|
||||
const auto & [key, offset, size] = *it++;
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
if (!cell)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache is in inconsistent state: LRU queue contains entries with no cache cell");
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cache is in inconsistent state: LRU queue contains entries with no cache cell");
|
||||
|
||||
if (cell->releasable() || force_remove_unreleasable)
|
||||
{
|
||||
auto file_segment = cell->file_segment;
|
||||
if (file_segment)
|
||||
{
|
||||
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
|
||||
std::lock_guard segment_lock(file_segment->mutex);
|
||||
file_segment->detach(cache_lock, segment_lock);
|
||||
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
||||
}
|
||||
@ -606,7 +650,9 @@ void LRUFileCache::remove(
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset);
|
||||
|
||||
if (cell->queue_iterator)
|
||||
queue.erase(*cell->queue_iterator);
|
||||
{
|
||||
queue.remove(*cell->queue_iterator, cache_lock);
|
||||
}
|
||||
|
||||
auto & offsets = files[key];
|
||||
offsets.erase(offset);
|
||||
@ -642,7 +688,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
|
||||
Key key;
|
||||
UInt64 offset = 0;
|
||||
size_t size = 0;
|
||||
std::vector<std::pair<LRUQueueIterator, std::weak_ptr<FileSegment>>> queue_entries;
|
||||
std::vector<std::pair<LRUQueue::Iterator, std::weak_ptr<FileSegment>>> queue_entries;
|
||||
|
||||
/// cache_base_path / key_prefix / key / offset
|
||||
|
||||
@ -681,7 +727,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
|
||||
{
|
||||
LOG_WARNING(log,
|
||||
"Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
|
||||
max_size, availableSize(), key_it->path().string(), size);
|
||||
max_size, getAvailableCacheSizeUnlocked(cache_lock), key_it->path().string(), size);
|
||||
fs::remove(offset_it->path());
|
||||
}
|
||||
}
|
||||
@ -699,47 +745,11 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
|
||||
if (file_segment.expired())
|
||||
continue;
|
||||
|
||||
queue.splice(queue.end(), queue, it);
|
||||
queue.moveToEnd(it, cache_lock);
|
||||
}
|
||||
}
|
||||
|
||||
LRUFileCache::Stat LRUFileCache::getStat()
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
Stat stat
|
||||
{
|
||||
.size = queue.size(),
|
||||
.available = availableSize(),
|
||||
.downloaded_size = 0,
|
||||
.downloading_size = 0,
|
||||
};
|
||||
|
||||
for (const auto & [key, offset] : queue)
|
||||
{
|
||||
const auto * cell = getCell(key, offset, cache_lock);
|
||||
if (!cell)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
|
||||
|
||||
switch (cell->file_segment->download_state)
|
||||
{
|
||||
case FileSegment::State::DOWNLOADED:
|
||||
{
|
||||
++stat.downloaded_size;
|
||||
break;
|
||||
}
|
||||
case FileSegment::State::DOWNLOADING:
|
||||
{
|
||||
++stat.downloading_size;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return stat;
|
||||
#ifndef NDEBUG
|
||||
assertCacheCorrectness(cache_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
void LRUFileCache::reduceSizeToDownloaded(
|
||||
@ -754,14 +764,23 @@ void LRUFileCache::reduceSizeToDownloaded(
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
|
||||
if (!cell)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cell found for key: {}, offset: {}", keyToStr(key), offset);
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"No cell found for key: {}, offset: {}",
|
||||
keyToStr(key), offset);
|
||||
}
|
||||
|
||||
const auto & file_segment = cell->file_segment;
|
||||
|
||||
size_t downloaded_size = file_segment->downloaded_size;
|
||||
if (downloaded_size == file_segment->range().size())
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}", keyToStr(key), offset);
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}",
|
||||
keyToStr(key), offset);
|
||||
}
|
||||
|
||||
cell->file_segment = std::make_shared<FileSegment>(offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED);
|
||||
}
|
||||
@ -814,16 +833,40 @@ std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
|
||||
size_t LRUFileCache::getUsedCacheSize() const
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
return current_size;
|
||||
return getUsedCacheSizeUnlocked(cache_lock);
|
||||
}
|
||||
|
||||
size_t LRUFileCache::getCacheFilesNum() const
|
||||
size_t LRUFileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
|
||||
{
|
||||
return queue.getTotalWeight(cache_lock);
|
||||
}
|
||||
|
||||
size_t LRUFileCache::getAvailableCacheSize() const
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
return files.size();
|
||||
return getAvailableCacheSizeUnlocked(cache_lock);
|
||||
}
|
||||
|
||||
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
|
||||
size_t LRUFileCache::getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
|
||||
{
|
||||
return max_size - getUsedCacheSizeUnlocked(cache_lock);
|
||||
}
|
||||
|
||||
size_t LRUFileCache::getFileSegmentsNum() const
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
return getFileSegmentsNumUnlocked(cache_lock);
|
||||
}
|
||||
|
||||
size_t LRUFileCache::getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const
|
||||
{
|
||||
return queue.getElementsNum(cache_lock);
|
||||
}
|
||||
|
||||
LRUFileCache::FileSegmentCell::FileSegmentCell(
|
||||
FileSegmentPtr file_segment_,
|
||||
LRUFileCache * cache,
|
||||
std::lock_guard<std::mutex> & cache_lock)
|
||||
: file_segment(file_segment_)
|
||||
{
|
||||
/**
|
||||
@ -836,7 +879,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
||||
{
|
||||
case FileSegment::State::DOWNLOADED:
|
||||
{
|
||||
queue_iterator = queue_.insert(queue_.end(), getKeyAndOffset());
|
||||
queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock);
|
||||
break;
|
||||
}
|
||||
case FileSegment::State::EMPTY:
|
||||
@ -851,13 +894,97 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
||||
}
|
||||
}
|
||||
|
||||
LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
|
||||
const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
for (const auto & [entry_key, entry_offset, _] : queue)
|
||||
{
|
||||
if (entry_key == key && entry_offset == offset)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
|
||||
keyToStr(key), offset, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
cache_size += size;
|
||||
return queue.insert(queue.end(), FileKeyAndOffset(key, offset, size));
|
||||
}
|
||||
|
||||
void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
cache_size -= queue_it->size;
|
||||
queue.erase(queue_it);
|
||||
}
|
||||
|
||||
void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
queue.splice(queue.end(), queue, queue_it);
|
||||
}
|
||||
|
||||
void LRUFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
cache_size += size_increment;
|
||||
queue_it->size += size_increment;
|
||||
}
|
||||
|
||||
bool LRUFileCache::LRUQueue::contains(
|
||||
const IFileCache::Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */) const
|
||||
{
|
||||
/// This method is used for assertions in debug mode.
|
||||
/// So we do not care about complexity here.
|
||||
for (const auto & [entry_key, entry_offset, size] : queue)
|
||||
{
|
||||
if (key == entry_key && offset == entry_offset)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
[[maybe_unused]] size_t total_size = 0;
|
||||
for (auto it = queue.begin(); it != queue.end();)
|
||||
{
|
||||
auto & [key, offset, size] = *it++;
|
||||
|
||||
auto * cell = cache->getCell(key, offset, cache_lock);
|
||||
if (!cell)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
|
||||
}
|
||||
|
||||
assert(cell->size() == size);
|
||||
total_size += size;
|
||||
}
|
||||
|
||||
assert(total_size == cache_size);
|
||||
assert(cache_size <= cache->max_size);
|
||||
assert(queue.size() <= cache->max_element_size);
|
||||
}
|
||||
|
||||
String LRUFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
|
||||
{
|
||||
String result;
|
||||
for (const auto & [key, offset, size] : queue)
|
||||
{
|
||||
if (!result.empty())
|
||||
result += ", ";
|
||||
result += fmt::format("{}: [{}, {}]", keyToStr(key), offset, offset + size - 1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
String LRUFileCache::dumpStructure(const Key & key)
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
return dumpStructureImpl(key, cache_lock);
|
||||
return dumpStructureUnlocked(key, cache_lock);
|
||||
}
|
||||
|
||||
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
String LRUFileCache::dumpStructureUnlocked(const Key & key, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
WriteBufferFromOwnString result;
|
||||
const auto & cells_by_offset = files[key];
|
||||
@ -865,18 +992,37 @@ String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mut
|
||||
for (const auto & [offset, cell] : cells_by_offset)
|
||||
result << cell.file_segment->getInfoForLog() << "\n";
|
||||
|
||||
result << "\n\nQueue: " << queue.toString(cache_lock);
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
void LRUFileCache::assertCacheCellsCorrectness(
|
||||
const FileSegmentsByOffset & cells_by_offset, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
const auto & cells_by_offset = files[key];
|
||||
|
||||
for (const auto & [_, cell] : cells_by_offset)
|
||||
{
|
||||
const auto & file_segment = cell.file_segment;
|
||||
file_segment->assertCorrectness();
|
||||
|
||||
if (file_segment->reserved_size != 0)
|
||||
{
|
||||
assert(cell.queue_iterator);
|
||||
assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
assertCacheCellsCorrectness(files[key], cache_lock);
|
||||
queue.assertCorrectness(this, cache_lock);
|
||||
}
|
||||
|
||||
void LRUFileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
for (const auto & [key, cells_by_offset] : files)
|
||||
assertCacheCellsCorrectness(files[key], cache_lock);
|
||||
queue.assertCorrectness(this, cache_lock);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ public:
|
||||
|
||||
virtual size_t getUsedCacheSize() const = 0;
|
||||
|
||||
virtual size_t getCacheFilesNum() const = 0;
|
||||
virtual size_t getFileSegmentsNum() const = 0;
|
||||
|
||||
protected:
|
||||
String cache_base_path;
|
||||
@ -155,19 +155,57 @@ public:
|
||||
|
||||
size_t getUsedCacheSize() const override;
|
||||
|
||||
size_t getCacheFilesNum() const override;
|
||||
size_t getFileSegmentsNum() const override;
|
||||
|
||||
private:
|
||||
using FileKeyAndOffset = std::pair<Key, size_t>;
|
||||
using LRUQueue = std::list<FileKeyAndOffset>;
|
||||
using LRUQueueIterator = typename LRUQueue::iterator;
|
||||
class LRUQueue
|
||||
{
|
||||
public:
|
||||
struct FileKeyAndOffset
|
||||
{
|
||||
Key key;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
|
||||
FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {}
|
||||
};
|
||||
|
||||
using Iterator = typename std::list<FileKeyAndOffset>::iterator;
|
||||
|
||||
size_t getTotalWeight(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
|
||||
|
||||
size_t getElementsNum(std::lock_guard<std::mutex> & /* cache_lock */) const { return queue.size(); }
|
||||
|
||||
Iterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void remove(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
/// Space reservation for a file segment is incremental, so we need to be able to increment size of the queue entry.
|
||||
void incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String toString(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
Iterator begin() { return queue.begin(); }
|
||||
|
||||
Iterator end() { return queue.end(); }
|
||||
|
||||
private:
|
||||
std::list<FileKeyAndOffset> queue;
|
||||
size_t cache_size = 0;
|
||||
};
|
||||
|
||||
struct FileSegmentCell : private boost::noncopyable
|
||||
{
|
||||
FileSegmentPtr file_segment;
|
||||
|
||||
/// Iterator is put here on first reservation attempt, if successful.
|
||||
std::optional<LRUQueueIterator> queue_iterator;
|
||||
std::optional<LRUQueue::Iterator> queue_iterator;
|
||||
|
||||
/// Pointer to file segment is always hold by the cache itself.
|
||||
/// Apart from pointer in cache, it can be hold by cache users, when they call
|
||||
@ -176,13 +214,11 @@ private:
|
||||
|
||||
size_t size() const { return file_segment->reserved_size; }
|
||||
|
||||
FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_);
|
||||
FileSegmentCell(FileSegmentPtr file_segment_, LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegmentCell(FileSegmentCell && other) noexcept
|
||||
: file_segment(std::move(other.file_segment))
|
||||
, queue_iterator(other.queue_iterator) {}
|
||||
|
||||
std::pair<Key, size_t> getKeyAndOffset() const { return std::make_pair(file_segment->key(), file_segment->range().left); }
|
||||
};
|
||||
|
||||
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
|
||||
@ -190,7 +226,6 @@ private:
|
||||
|
||||
CachedFiles files;
|
||||
LRUQueue queue;
|
||||
size_t current_size = 0;
|
||||
Poco::Logger * log;
|
||||
|
||||
FileSegments getImpl(
|
||||
@ -225,31 +260,32 @@ private:
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) override;
|
||||
|
||||
size_t availableSize() const { return max_size - current_size; }
|
||||
size_t getAvailableCacheSize() const;
|
||||
|
||||
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegments splitRangeIntoCells(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void fillHolesWithEmptyFileSegments(
|
||||
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
public:
|
||||
struct Stat
|
||||
{
|
||||
size_t size;
|
||||
size_t available;
|
||||
size_t downloaded_size;
|
||||
size_t downloading_size;
|
||||
};
|
||||
|
||||
Stat getStat();
|
||||
|
||||
String dumpStructure(const Key & key_) override;
|
||||
|
||||
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -94,11 +94,6 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
|
||||
}
|
||||
|
||||
String FileSegment::getCallerId()
|
||||
{
|
||||
return getCallerIdImpl();
|
||||
}
|
||||
|
||||
String FileSegment::getCallerIdImpl()
|
||||
{
|
||||
if (!CurrentThread::isInitialized()
|
||||
|| !CurrentThread::get().getQueryContext()
|
||||
@ -400,7 +395,10 @@ bool FileSegment::reserve(size_t size)
|
||||
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
|
||||
|
||||
if (reserved)
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
reserved_size += size;
|
||||
}
|
||||
|
||||
return reserved;
|
||||
}
|
||||
@ -606,6 +604,7 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
|
||||
info << "File segment: " << range().toString() << ", ";
|
||||
info << "state: " << download_state << ", ";
|
||||
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
|
||||
info << "reserved size: " << reserved_size << ", ";
|
||||
info << "downloader id: " << downloader_id << ", ";
|
||||
info << "caller id: " << getCallerId();
|
||||
|
||||
|
@ -184,8 +184,6 @@ private:
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
static String getCallerIdImpl();
|
||||
|
||||
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
const Range segment_range;
|
||||
|
@ -14,7 +14,7 @@
|
||||
* make a persistent copy of the key in each of the following cases:
|
||||
* 1) the aggregation method doesn't use temporary keys, so they're persistent
|
||||
* from the start;
|
||||
* 1) the key is already present in the hash table;
|
||||
* 2) the key is already present in the hash table;
|
||||
* 3) that particular key is stored by value, e.g. a short StringRef key in
|
||||
* StringHashMap.
|
||||
*
|
||||
|
@ -83,7 +83,7 @@ public:
|
||||
current_word = 0;
|
||||
}
|
||||
|
||||
void update(const char * data, UInt64 size)
|
||||
ALWAYS_INLINE void update(const char * data, UInt64 size)
|
||||
{
|
||||
const char * end = data + size;
|
||||
|
||||
@ -137,12 +137,12 @@ public:
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void update(const T & x)
|
||||
ALWAYS_INLINE void update(const T & x)
|
||||
{
|
||||
update(reinterpret_cast<const char *>(&x), sizeof(x)); /// NOLINT
|
||||
}
|
||||
|
||||
void update(const std::string & x)
|
||||
ALWAYS_INLINE void update(const std::string & x)
|
||||
{
|
||||
update(x.data(), x.length());
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ ThreadStatus::~ThreadStatus()
|
||||
deleter();
|
||||
|
||||
/// Only change current_thread if it's currently being used by this ThreadStatus
|
||||
/// For example, PushingToViewsBlockOutputStream creates and deletes ThreadStatus instances while running in the main query thread
|
||||
/// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread
|
||||
if (current_thread == this)
|
||||
current_thread = nullptr;
|
||||
}
|
||||
|
@ -135,6 +135,8 @@ TEST(LRUFileCache, get)
|
||||
/// Current cache: [__________]
|
||||
/// ^ ^
|
||||
/// 0 9
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 10);
|
||||
|
||||
{
|
||||
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
|
||||
@ -154,6 +156,8 @@ TEST(LRUFileCache, get)
|
||||
/// Current cache: [__________][_____]
|
||||
/// ^ ^^ ^
|
||||
/// 0 910 14
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 15);
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 9, 1); /// Get [9, 9]
|
||||
@ -179,12 +183,15 @@ TEST(LRUFileCache, get)
|
||||
|
||||
complete(cache.getOrSet(key, 17, 4)); /// Get [17, 20]
|
||||
complete(cache.getOrSet(key, 24, 3)); /// Get [24, 26]
|
||||
complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
|
||||
// complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
|
||||
|
||||
|
||||
/// Current cache: [__________][_____] [____] [___][]
|
||||
/// ^ ^^ ^ ^ ^ ^ ^^^
|
||||
/// 0 910 14 17 20 24 2627
|
||||
///
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 22);
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 0, 26); /// Get [0, 25]
|
||||
@ -249,7 +256,7 @@ TEST(LRUFileCache, get)
|
||||
/// ^ ^ ^ ^ ^
|
||||
/// 10 17 21 24 26
|
||||
|
||||
ASSERT_EQ(cache.getStat().size, 5);
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 23, 5); /// Get [23, 28]
|
||||
@ -479,8 +486,6 @@ TEST(LRUFileCache, get)
|
||||
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
|
||||
cache2.initialize();
|
||||
|
||||
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
|
||||
|
||||
auto holder1 = cache2.getOrSet(key, 2, 28); /// Get [2, 29]
|
||||
auto segments1 = fromHolder(holder1);
|
||||
ASSERT_EQ(segments1.size(), 5);
|
||||
|
@ -46,7 +46,7 @@ struct BlockInfo
|
||||
void read(ReadBuffer & in);
|
||||
};
|
||||
|
||||
/// Block extension to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults.
|
||||
/// Block extension to support delayed defaults. AddingDefaultsTransform uses it to replace missing values with column defaults.
|
||||
class BlockMissingValues
|
||||
{
|
||||
public:
|
||||
|
@ -22,6 +22,10 @@ namespace DB
|
||||
{
|
||||
class IColumn;
|
||||
|
||||
static constexpr UInt64 operator""_Gb(unsigned long long value)
|
||||
{
|
||||
return value * 1024 * 1024 * 1024;
|
||||
}
|
||||
|
||||
/** List of settings: type, name, default value, description, flags
|
||||
*
|
||||
@ -340,7 +344,7 @@ class IColumn;
|
||||
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
|
||||
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
|
||||
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
|
||||
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
|
||||
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
|
||||
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
|
||||
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
|
||||
@ -356,9 +360,9 @@ class IColumn;
|
||||
M(OverflowMode, distinct_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, max_memory_usage, 0, "Maximum memory usage for processing of single query. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_guaranteed_memory_usage, 0, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_guaranteed_memory_usage, 10_Gb, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_guaranteed_memory_usage_for_user, 0, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_guaranteed_memory_usage_for_user, 10_Gb, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
|
||||
M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
|
||||
M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
|
||||
|
@ -34,7 +34,8 @@ IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
|
||||
{{"auto", JoinAlgorithm::AUTO},
|
||||
{"hash", JoinAlgorithm::HASH},
|
||||
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
|
||||
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}})
|
||||
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
|
||||
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH}})
|
||||
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,
|
||||
|
@ -42,6 +42,7 @@ enum class JoinAlgorithm
|
||||
HASH,
|
||||
PARTIAL_MERGE,
|
||||
PREFER_PARTIAL_MERGE,
|
||||
PARALLEL_HASH,
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(JoinAlgorithm)
|
||||
|
@ -26,7 +26,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int DUPLICATE_COLUMN;
|
||||
extern const int INCOMPATIBLE_COLUMNS;
|
||||
}
|
||||
|
||||
size_t getNumberOfDimensions(const IDataType & type)
|
||||
@ -183,6 +183,20 @@ static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts &
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if there exists a prefix with matched names,
|
||||
/// but not matched structure (is Nested, number of dimensions).
|
||||
static bool hasDifferentStructureInPrefix(const PathInData::Parts & lhs, const PathInData::Parts & rhs)
|
||||
{
|
||||
for (size_t i = 0; i < std::min(lhs.size(), rhs.size()); ++i)
|
||||
{
|
||||
if (lhs[i].key != rhs[i].key)
|
||||
return false;
|
||||
else if (lhs[i] != rhs[i])
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
||||
{
|
||||
size_t size = paths.size();
|
||||
@ -192,9 +206,15 @@ void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
||||
{
|
||||
if (isPrefix(paths[i].getParts(), paths[j].getParts())
|
||||
|| isPrefix(paths[j].getParts(), paths[i].getParts()))
|
||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
|
||||
"Data in Object has ambiguous paths: '{}' and '{}'",
|
||||
paths[i].getPath(), paths[j].getPath());
|
||||
|
||||
if (hasDifferentStructureInPrefix(paths[i].getParts(), paths[j].getParts()))
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
|
||||
"Data in Object has ambiguous paths: '{}' and '{}'. "
|
||||
"Paths have prefixes matched by names, but different in structure",
|
||||
paths[i].getPath(), paths[j].getPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -213,11 +213,14 @@ bool JSONDataParser<ParserImpl>::tryInsertDefaultFromNested(
|
||||
{
|
||||
/// If there is a collected size of current Nested
|
||||
/// then insert array of this size as a default value.
|
||||
|
||||
if (path.empty())
|
||||
if (path.empty() || array.empty())
|
||||
return false;
|
||||
|
||||
/// Last element is not Null, because otherwise this path wouldn't exist.
|
||||
auto nested_key = getNameOfNested(path, array.back());
|
||||
if (nested_key.empty())
|
||||
return false;
|
||||
|
||||
StringRef nested_key{path[0].key};
|
||||
auto * mapped = ctx.nested_sizes_by_key.find(nested_key);
|
||||
if (!mapped)
|
||||
return false;
|
||||
@ -253,7 +256,18 @@ StringRef JSONDataParser<ParserImpl>::getNameOfNested(const PathInData::Parts &
|
||||
if (value.getType() != Field::Types::Array || path.empty())
|
||||
return {};
|
||||
|
||||
return StringRef{path[0].key};
|
||||
/// Find first key that is marked as nested,
|
||||
/// because we may have tuple of Nested and there could be
|
||||
/// several arrays with the same prefix, but with independent sizes.
|
||||
/// Consider we have array element with type `k2 Tuple(k3 Nested(...), k5 Nested(...))`
|
||||
/// Then subcolumns `k2.k3` and `k2.k5` may have indepented sizes and we should extract
|
||||
/// `k3` and `k5` keys instead of `k2`.
|
||||
|
||||
for (const auto & part : path)
|
||||
if (part.is_nested)
|
||||
return StringRef{part.key};
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
#if USE_SIMDJSON
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
* 1. \N
|
||||
* 2. empty string (without quotes)
|
||||
* 3. NULL
|
||||
* We support all of them (however, second variant is supported by CSVRowInputStream, not by deserializeTextCSV).
|
||||
* We support all of them (however, second variant is supported by CSVRowInputFormat, not by deserializeTextCSV).
|
||||
* (see also input_format_defaults_for_omitted_fields and input_format_csv_unquoted_null_literal_as_null settings)
|
||||
* In CSV, non-NULL string value, starting with \N characters, must be placed in quotes, to avoid ambiguity.
|
||||
*/
|
||||
|
@ -104,15 +104,17 @@ bool DatabaseReplicatedDDLWorker::waitForReplicaToProcessAllEntries(UInt64 timeo
|
||||
auto max_log = DDLTask::getLogEntryName(max_log_ptr);
|
||||
LOG_TRACE(log, "Waiting for worker thread to process all entries before {}, current task is {}", max_log, current_task);
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
bool processed = wait_current_task_change.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&]()
|
||||
{
|
||||
assert(zookeeper->expired() || current_task <= max_log);
|
||||
return zookeeper->expired() || current_task == max_log || stop_flag;
|
||||
});
|
||||
std::unique_lock lock{mutex};
|
||||
bool processed = wait_current_task_change.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&]()
|
||||
{
|
||||
assert(zookeeper->expired() || current_task <= max_log);
|
||||
return zookeeper->expired() || current_task == max_log || stop_flag;
|
||||
});
|
||||
|
||||
if (!processed)
|
||||
return false;
|
||||
if (!processed)
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Waiting for worker thread to process all entries before {}, current task is {}", max_log, current_task);
|
||||
|
||||
|
@ -217,7 +217,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
|
||||
}
|
||||
else
|
||||
{
|
||||
/// We pass empty block to RemoteBlockInputStream, because we don't know the structure of the result.
|
||||
/// We pass empty block to RemoteQueryExecutor, because we don't know the structure of the result.
|
||||
Block invalidate_sample_block;
|
||||
QueryPipeline pipeline(std::make_shared<RemoteSource>(
|
||||
std::make_shared<RemoteQueryExecutor>(pool, request, invalidate_sample_block, context_copy), false, false));
|
||||
|
@ -67,7 +67,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
|
||||
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
|
||||
block_key_columns.reserve(dictionary_keys_size);
|
||||
|
||||
QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys));
|
||||
QueryPipeline pipeline(getSourcePipe(key_columns, requested_keys));
|
||||
|
||||
PullingPipelineExecutor executor(pipeline);
|
||||
|
||||
@ -185,7 +185,7 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
|
||||
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
|
||||
block_key_columns.reserve(dictionary_keys_size);
|
||||
|
||||
QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys));
|
||||
QueryPipeline pipeline(getSourcePipe(key_columns, requested_keys));
|
||||
PullingPipelineExecutor executor(pipeline);
|
||||
|
||||
size_t keys_found = 0;
|
||||
@ -259,7 +259,7 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::isInHierarchy(
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
Pipe DirectDictionary<dictionary_key_type>::getSourceBlockInputStream(
|
||||
Pipe DirectDictionary<dictionary_key_type>::getSourcePipe(
|
||||
const Columns & key_columns [[maybe_unused]],
|
||||
const PaddedPODArray<KeyType> & requested_keys [[maybe_unused]]) const
|
||||
{
|
||||
|
@ -96,7 +96,7 @@ public:
|
||||
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
|
||||
|
||||
private:
|
||||
Pipe getSourceBlockInputStream(const Columns & key_columns, const PaddedPODArray<KeyType> & requested_keys) const;
|
||||
Pipe getSourcePipe(const Columns & key_columns, const PaddedPODArray<KeyType> & requested_keys) const;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
|
@ -697,7 +697,6 @@ void IPAddressDictionary::getItemsImpl(
|
||||
const auto & first_column = key_columns.front();
|
||||
const size_t rows = first_column->size();
|
||||
|
||||
// special case for getBlockInputStream
|
||||
if (unlikely(key_columns.size() == 2))
|
||||
{
|
||||
getItemsByTwoKeyColumnsImpl<AttributeType>(
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||
#include <IO/SeekAvoidingReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
@ -62,7 +62,7 @@ public:
|
||||
|
||||
/** Fast reading data from buffer and save result to memory.
|
||||
* Reads at least min_chunk_bytes and some more until the end of the chunk, depends on the format.
|
||||
* Used in ParallelParsingBlockInputStream.
|
||||
* Used in ParallelParsingInputFormat.
|
||||
*/
|
||||
using FileSegmentationEngine = std::function<std::pair<bool, size_t>(
|
||||
ReadBuffer & buf,
|
||||
|
@ -44,7 +44,7 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_,
|
||||
{
|
||||
istr_concrete = typeid_cast<CompressedReadBufferFromFile *>(&istr);
|
||||
if (!istr_concrete)
|
||||
throw Exception("When need to use index for NativeBlockInputStream, istr must be CompressedReadBufferFromFile.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception("When need to use index for NativeReader, istr must be CompressedReadBufferFromFile.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (index_block_it == index_block_end)
|
||||
return;
|
||||
@ -80,7 +80,7 @@ void NativeReader::readData(const ISerialization & serialization, ColumnPtr & co
|
||||
|
||||
if (column->size() != rows)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||
"Cannot read all data in NativeBlockInputStream. Rows read: {}. Rows expected: {}", column->size(), rows);
|
||||
"Cannot read all data in NativeReader. Rows read: {}. Rows expected: {}", column->size(), rows);
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,7 +35,7 @@ NativeWriter::NativeWriter(
|
||||
{
|
||||
ostr_concrete = typeid_cast<CompressedWriteBuffer *>(&ostr);
|
||||
if (!ostr_concrete)
|
||||
throw Exception("When need to write index for NativeBlockOutputStream, ostr must be CompressedWriteBuffer.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception("When need to write index for NativeWriter, ostr must be CompressedWriteBuffer.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -417,17 +417,17 @@ struct TimeWindowImpl<HOP>
|
||||
{
|
||||
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
|
||||
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
|
||||
wstart = AddTime<kind>::execute(wend, -1 * window_num_units, time_zone);
|
||||
wstart = AddTime<kind>::execute(wend, -window_num_units, time_zone);
|
||||
ToType wend_latest;
|
||||
|
||||
do
|
||||
{
|
||||
wend_latest = wend;
|
||||
wend = AddTime<kind>::execute(wend, -1 * hop_num_units, time_zone);
|
||||
wend = AddTime<kind>::execute(wend, -hop_num_units, time_zone);
|
||||
} while (wend > time_data[i]);
|
||||
|
||||
end_data[i] = wend_latest;
|
||||
start_data[i] = AddTime<kind>::execute(wend_latest, -1 * window_num_units, time_zone);
|
||||
start_data[i] = AddTime<kind>::execute(wend_latest, -window_num_units, time_zone);
|
||||
}
|
||||
MutableColumns result;
|
||||
result.emplace_back(std::move(start));
|
||||
@ -570,7 +570,7 @@ struct TimeWindowImpl<WINDOW_ID>
|
||||
do
|
||||
{
|
||||
wend_latest = wend;
|
||||
wend = AddTime<kind>::execute(wend, -1 * gcd_num_units, time_zone);
|
||||
wend = AddTime<kind>::execute(wend, -gcd_num_units, time_zone);
|
||||
} while (wend > time_data[i]);
|
||||
|
||||
end_data[i] = wend_latest;
|
||||
|
@ -308,7 +308,7 @@ void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink,
|
||||
{
|
||||
ssize_t size = length;
|
||||
if (size < 0)
|
||||
size += static_cast<ssize_t>(src.getElementSize()) - offset;
|
||||
size += offset;
|
||||
|
||||
if (size > 0)
|
||||
writeSlice(src.getSliceFromRight(offset, size), sink);
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
~BitReader() = default;
|
||||
|
||||
// reads bits_to_read high-bits from bits_buffer
|
||||
inline UInt64 readBits(UInt8 bits_to_read)
|
||||
ALWAYS_INLINE inline UInt64 readBits(UInt8 bits_to_read)
|
||||
{
|
||||
if (bits_to_read > bits_count)
|
||||
fillBitBuffer();
|
||||
@ -71,7 +71,7 @@ public:
|
||||
return getBitsFromBitBuffer<PEEK>(8);
|
||||
}
|
||||
|
||||
inline UInt8 readBit()
|
||||
ALWAYS_INLINE inline UInt8 readBit()
|
||||
{
|
||||
return static_cast<UInt8>(readBits(1));
|
||||
}
|
||||
@ -122,7 +122,7 @@ private:
|
||||
|
||||
|
||||
// Fills internal bits_buffer with data from source, reads at most 64 bits
|
||||
size_t fillBitBuffer()
|
||||
ALWAYS_INLINE size_t fillBitBuffer()
|
||||
{
|
||||
const size_t available = source_end - source_current;
|
||||
const auto bytes_to_read = std::min<size_t>(64 / 8, available);
|
||||
|
@ -31,7 +31,6 @@ namespace DB
|
||||
// because custom S3 implementation may allow relaxed requirements on that.
|
||||
const int S3_WARN_MAX_PARTS = 10000;
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int S3_ERROR;
|
||||
|
@ -1194,9 +1194,18 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl(
|
||||
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
|
||||
{
|
||||
if (inst->offsets)
|
||||
inst->batch_that->addBatchSinglePlaceFromInterval(inst->offsets[row_begin], inst->offsets[row_end - 1], res + inst->state_offset, inst->batch_arguments, arena);
|
||||
inst->batch_that->addBatchSinglePlaceFromInterval(
|
||||
inst->offsets[static_cast<ssize_t>(row_begin) - 1],
|
||||
inst->offsets[row_end - 1],
|
||||
res + inst->state_offset,
|
||||
inst->batch_arguments, arena);
|
||||
else
|
||||
inst->batch_that->addBatchSinglePlaceFromInterval(row_begin, row_end, res + inst->state_offset, inst->batch_arguments, arena);
|
||||
inst->batch_that->addBatchSinglePlaceFromInterval(
|
||||
row_begin,
|
||||
row_end,
|
||||
res + inst->state_offset,
|
||||
inst->batch_arguments,
|
||||
arena);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -616,7 +616,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
for (const auto & [_, cache_data] : caches)
|
||||
{
|
||||
new_values["FilesystemCacheBytes"] = cache_data.cache->getUsedCacheSize();
|
||||
new_values["FilesystemCacheFiles"] = cache_data.cache->getCacheFilesNum();
|
||||
new_values["FilesystemCacheFiles"] = cache_data.cache->getFileSegmentsNum();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,8 +37,8 @@ class IStreamFactory;
|
||||
ContextMutablePtr updateSettingsForCluster(
|
||||
const Cluster & cluster, ContextPtr context, const Settings & settings, Poco::Logger * log = nullptr);
|
||||
|
||||
/// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read.
|
||||
/// `stream_factory` object encapsulates the logic of creating streams for a different type of query
|
||||
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
|
||||
/// `stream_factory` object encapsulates the logic of creating plans for a different type of query
|
||||
/// (currently SELECT, DESCRIBE).
|
||||
void executeQuery(
|
||||
QueryPlan & query_plan,
|
||||
|
206
src/Interpreters/ConcurrentHashJoin.cpp
Normal file
206
src/Interpreters/ConcurrentHashJoin.cpp
Normal file
@ -0,0 +1,206 @@
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <Columns/FilterDescription.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Interpreters/ConcurrentHashJoin.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Interpreters/SubqueryForSet.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/createBlockSelector.h>
|
||||
#include <Parsers/DumpASTNode.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SET_SIZE_LIMIT_EXCEEDED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
namespace JoinStuff
|
||||
{
|
||||
ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_)
|
||||
: context(context_)
|
||||
, table_join(table_join_)
|
||||
, slots(slots_)
|
||||
{
|
||||
if (!slots_ || slots_ >= 256)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < slots; ++i)
|
||||
{
|
||||
auto inner_hash_join = std::make_shared<InternalHashJoin>();
|
||||
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_);
|
||||
hash_joins.emplace_back(std::move(inner_hash_join));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits)
|
||||
{
|
||||
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, block);
|
||||
|
||||
std::list<size_t> pending_blocks;
|
||||
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
|
||||
pending_blocks.emplace_back(i);
|
||||
while (!pending_blocks.empty())
|
||||
{
|
||||
for (auto iter = pending_blocks.begin(); iter != pending_blocks.end();)
|
||||
{
|
||||
auto & i = *iter;
|
||||
auto & hash_join = hash_joins[i];
|
||||
auto & dispatched_block = dispatched_blocks[i];
|
||||
if (hash_join->mutex.try_lock())
|
||||
{
|
||||
if (!hash_join->data->addJoinedBlock(dispatched_block, check_limits))
|
||||
{
|
||||
hash_join->mutex.unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
hash_join->mutex.unlock();
|
||||
iter = pending_blocks.erase(iter);
|
||||
}
|
||||
else
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
if (check_limits)
|
||||
return table_join->sizeLimits().check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr<ExtraBlock> & /*not_processed*/)
|
||||
{
|
||||
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_left, block);
|
||||
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
|
||||
{
|
||||
std::shared_ptr<ExtraBlock> none_extra_block;
|
||||
auto & hash_join = hash_joins[i];
|
||||
auto & dispatched_block = dispatched_blocks[i];
|
||||
hash_join->data->joinBlock(dispatched_block, none_extra_block);
|
||||
if (none_extra_block && !none_extra_block->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "not_processed should be empty");
|
||||
}
|
||||
|
||||
block = concatenateBlocks(dispatched_blocks);
|
||||
}
|
||||
|
||||
void ConcurrentHashJoin::checkTypesOfKeys(const Block & block) const
|
||||
{
|
||||
hash_joins[0]->data->checkTypesOfKeys(block);
|
||||
}
|
||||
|
||||
void ConcurrentHashJoin::setTotals(const Block & block)
|
||||
{
|
||||
if (block)
|
||||
{
|
||||
std::lock_guard lock(totals_mutex);
|
||||
totals = block;
|
||||
}
|
||||
}
|
||||
|
||||
const Block & ConcurrentHashJoin::getTotals() const
|
||||
{
|
||||
return totals;
|
||||
}
|
||||
|
||||
size_t ConcurrentHashJoin::getTotalRowCount() const
|
||||
{
|
||||
size_t res = 0;
|
||||
for (const auto & hash_join : hash_joins)
|
||||
{
|
||||
std::lock_guard lock(hash_join->mutex);
|
||||
res += hash_join->data->getTotalRowCount();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t ConcurrentHashJoin::getTotalByteCount() const
|
||||
{
|
||||
size_t res = 0;
|
||||
for (const auto & hash_join : hash_joins)
|
||||
{
|
||||
std::lock_guard lock(hash_join->mutex);
|
||||
res += hash_join->data->getTotalByteCount();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
bool ConcurrentHashJoin::alwaysReturnsEmptySet() const
|
||||
{
|
||||
for (const auto & hash_join : hash_joins)
|
||||
{
|
||||
std::lock_guard lock(hash_join->mutex);
|
||||
if (!hash_join->data->alwaysReturnsEmptySet())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<NotJoinedBlocks> ConcurrentHashJoin::getNonJoinedBlocks(
|
||||
const Block & /*left_sample_block*/, const Block & /*result_sample_block*/, UInt64 /*max_block_size*/) const
|
||||
{
|
||||
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
|
||||
table_join->strictness() == ASTTableJoin::Strictness::Semi ||
|
||||
!isRightOrFull(table_join->kind()))
|
||||
{
|
||||
return {};
|
||||
}
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness());
|
||||
}
|
||||
|
||||
Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, const Block & from_block)
|
||||
{
|
||||
Blocks result;
|
||||
|
||||
size_t num_shards = hash_joins.size();
|
||||
size_t num_rows = from_block.rows();
|
||||
size_t num_cols = from_block.columns();
|
||||
|
||||
ColumnRawPtrs key_cols;
|
||||
for (const auto & key_name : key_columns_names)
|
||||
{
|
||||
key_cols.push_back(from_block.getByName(key_name).column.get());
|
||||
}
|
||||
IColumn::Selector selector(num_rows);
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
{
|
||||
SipHash hash;
|
||||
for (const auto & key_col : key_cols)
|
||||
{
|
||||
key_col->updateHashWithValue(i, hash);
|
||||
}
|
||||
selector[i] = hash.get64() % num_shards;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_shards; ++i)
|
||||
{
|
||||
result.emplace_back(from_block.cloneEmpty());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_cols; ++i)
|
||||
{
|
||||
auto dispatched_columns = from_block.getByPosition(i).column->scatter(num_shards, selector);
|
||||
assert(result.size() == dispatched_columns.size());
|
||||
for (size_t block_index = 0; block_index < num_shards; ++block_index)
|
||||
{
|
||||
result[block_index].getByPosition(i).column = std::move(dispatched_columns[block_index]);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
75
src/Interpreters/ConcurrentHashJoin.h
Normal file
75
src/Interpreters/ConcurrentHashJoin.h
Normal file
@ -0,0 +1,75 @@
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Functions/FunctionsLogical.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/HashJoin.h>
|
||||
#include <Interpreters/IJoin.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace JoinStuff
|
||||
{
|
||||
/**
|
||||
* Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by
|
||||
* the degree of parallelism.
|
||||
*
|
||||
* The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When
|
||||
* the right table is large, the join process is too slow.
|
||||
*
|
||||
* We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks
|
||||
* corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin
|
||||
* instance is written by only one thread.
|
||||
*
|
||||
* When come to the left table matching, the blocks from left table are alse split into different HashJoin instances.
|
||||
*
|
||||
*/
|
||||
class ConcurrentHashJoin : public IJoin
|
||||
{
|
||||
public:
|
||||
explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false);
|
||||
~ConcurrentHashJoin() override = default;
|
||||
|
||||
const TableJoin & getTableJoin() const override { return *table_join; }
|
||||
bool addJoinedBlock(const Block & block, bool check_limits) override;
|
||||
void checkTypesOfKeys(const Block & block) const override;
|
||||
void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) override;
|
||||
void setTotals(const Block & block) override;
|
||||
const Block & getTotals() const override;
|
||||
size_t getTotalRowCount() const override;
|
||||
size_t getTotalByteCount() const override;
|
||||
bool alwaysReturnsEmptySet() const override;
|
||||
bool supportParallelJoin() const override { return true; }
|
||||
std::shared_ptr<NotJoinedBlocks>
|
||||
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
|
||||
private:
|
||||
struct InternalHashJoin
|
||||
{
|
||||
std::mutex mutex;
|
||||
std::unique_ptr<HashJoin> data;
|
||||
};
|
||||
|
||||
ContextPtr context;
|
||||
std::shared_ptr<TableJoin> table_join;
|
||||
size_t slots;
|
||||
std::vector<std::shared_ptr<InternalHashJoin>> hash_joins;
|
||||
|
||||
std::mutex finished_add_joined_blocks_tasks_mutex;
|
||||
std::condition_variable finished_add_joined_blocks_tasks_cond;
|
||||
std::atomic<UInt32> finished_add_joined_blocks_tasks = 0;
|
||||
|
||||
mutable std::mutex totals_mutex;
|
||||
Block totals;
|
||||
|
||||
Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block);
|
||||
|
||||
};
|
||||
}
|
||||
}
|
@ -383,7 +383,7 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
|
||||
txn->addOp(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
|
||||
}
|
||||
|
||||
txn->addOp(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
|
||||
txn->addOp(getOpToUpdateLogPointer());
|
||||
|
||||
for (auto & op : ops)
|
||||
txn->addOp(std::move(op));
|
||||
@ -392,6 +392,11 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
|
||||
return query_context;
|
||||
}
|
||||
|
||||
Coordination::RequestPtr DatabaseReplicatedTask::getOpToUpdateLogPointer()
|
||||
{
|
||||
return zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1);
|
||||
}
|
||||
|
||||
String DDLTaskBase::getLogEntryName(UInt32 log_entry_number)
|
||||
{
|
||||
return zkutil::getSequentialNodeName("query-", log_entry_number);
|
||||
|
@ -107,6 +107,7 @@ struct DDLTaskBase
|
||||
virtual String getShardID() const = 0;
|
||||
|
||||
virtual ContextMutablePtr makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper);
|
||||
virtual Coordination::RequestPtr getOpToUpdateLogPointer() { return nullptr; }
|
||||
|
||||
inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; }
|
||||
inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; }
|
||||
@ -145,6 +146,7 @@ struct DatabaseReplicatedTask : public DDLTaskBase
|
||||
String getShardID() const override;
|
||||
void parseQueryFromEntry(ContextPtr context) override;
|
||||
ContextMutablePtr makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) override;
|
||||
Coordination::RequestPtr getOpToUpdateLogPointer() override;
|
||||
|
||||
DatabaseReplicated * database;
|
||||
};
|
||||
|
@ -180,7 +180,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
|
||||
{
|
||||
/// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
|
||||
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
|
||||
/// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
|
||||
/// Otherwise, that node will be ignored by DDLQueryStatusSource.
|
||||
out_reason = "Incorrect task format";
|
||||
write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
|
||||
return {};
|
||||
@ -715,6 +715,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
if (zookeeper->exists(is_executed_path, nullptr, event))
|
||||
{
|
||||
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
|
||||
if (auto op = task.getOpToUpdateLogPointer())
|
||||
task.ops.push_back(op);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -759,6 +761,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
{
|
||||
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by);
|
||||
executed_by_other_leader = true;
|
||||
if (auto op = task.getOpToUpdateLogPointer())
|
||||
task.ops.push_back(op);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -786,6 +790,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
{
|
||||
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
|
||||
executed_by_other_leader = true;
|
||||
if (auto op = task.getOpToUpdateLogPointer())
|
||||
task.ops.push_back(op);
|
||||
break;
|
||||
}
|
||||
else
|
||||
|
@ -91,7 +91,7 @@ protected:
|
||||
/// Executes query only on leader replica in case of replicated table.
|
||||
/// Queries like TRUNCATE/ALTER .../OPTIMIZE have to be executed only on one node of shard.
|
||||
/// Most of these queries can be executed on non-leader replica, but actually they still send
|
||||
/// query via RemoteBlockOutputStream to leader, so to avoid such "2-phase" query execution we
|
||||
/// query via RemoteQueryExecutor to leader, so to avoid such "2-phase" query execution we
|
||||
/// execute query directly on leader.
|
||||
bool tryExecuteQueryOnLeaderReplica(
|
||||
DDLTaskBase & task,
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <memory>
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
@ -16,6 +17,7 @@
|
||||
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ConcurrentHashJoin.h>
|
||||
#include <Interpreters/DictionaryReader.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
@ -934,7 +936,13 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> ana
|
||||
|
||||
bool allow_merge_join = analyzed_join->allowMergeJoin();
|
||||
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
|
||||
{
|
||||
if (analyzed_join->allowParallelHashJoin())
|
||||
{
|
||||
return std::make_shared<JoinStuff::ConcurrentHashJoin>(context, analyzed_join, context->getSettings().max_threads, sample_block);
|
||||
}
|
||||
return std::make_shared<HashJoin>(analyzed_join, sample_block);
|
||||
}
|
||||
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
|
||||
return std::make_shared<MergeJoin>(analyzed_join, sample_block);
|
||||
return std::make_shared<JoinSwitcher>(analyzed_join, sample_block);
|
||||
|
@ -13,7 +13,7 @@ bool equals(const Field & lhs, const Field & rhs);
|
||||
|
||||
/** Helps to implement modifier WITH FILL for ORDER BY clause.
|
||||
* Stores row as array of fields and provides functions to generate next row for filling gaps and for comparing rows.
|
||||
* Used in FillingBlockInputStream and in FillingTransform.
|
||||
* Used in FillingTransform.
|
||||
*/
|
||||
class FillingRow
|
||||
{
|
||||
|
@ -45,6 +45,9 @@ public:
|
||||
/// Different query plan is used for such joins.
|
||||
virtual bool isFilled() const { return false; }
|
||||
|
||||
// That can run FillingRightJoinSideTransform parallelly
|
||||
virtual bool supportParallelJoin() const { return false; }
|
||||
|
||||
virtual std::shared_ptr<NotJoinedBlocks>
|
||||
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0;
|
||||
};
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Access/Common/AccessFlags.h>
|
||||
#include <Access/EnabledQuota.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Processors/Transforms/buildPushingToViewsChain.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
@ -153,7 +154,18 @@ Block InterpreterInsertQuery::getSampleBlock(
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool hasAggregateFunctions(const IAST * ast)
|
||||
{
|
||||
if (const auto * func = typeid_cast<const ASTFunction *>(ast))
|
||||
if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name))
|
||||
return true;
|
||||
|
||||
for (const auto & child : ast->children)
|
||||
if (hasAggregateFunctions(child.get()))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
/** A query that just reads all data without any complex computations or filetering.
|
||||
* If we just pipe the result to INSERT, we don't have to use too many threads for read.
|
||||
*/
|
||||
@ -186,7 +198,8 @@ static bool isTrivialSelect(const ASTPtr & select)
|
||||
&& !select_query->groupBy()
|
||||
&& !select_query->having()
|
||||
&& !select_query->orderBy()
|
||||
&& !select_query->limitBy());
|
||||
&& !select_query->limitBy()
|
||||
&& !hasAggregateFunctions(select_query));
|
||||
}
|
||||
/// This query is ASTSelectWithUnionQuery subquery
|
||||
return false;
|
||||
@ -396,7 +409,7 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
||||
{
|
||||
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
||||
/// default column values (in AddingDefaultBlockOutputStream), so all values will be cast correctly.
|
||||
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
||||
if (input_columns[col_idx].type->isNullable() && !query_columns[col_idx].type->isNullable() && output_columns.hasDefault(query_columns[col_idx].name))
|
||||
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name));
|
||||
}
|
||||
|
@ -1280,7 +1280,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
query_plan.getCurrentDataStream(),
|
||||
joined_plan->getCurrentDataStream(),
|
||||
expressions.join,
|
||||
settings.max_block_size);
|
||||
settings.max_block_size,
|
||||
max_streams,
|
||||
analysis_result.optimize_read_in_order);
|
||||
|
||||
join_step->setStepDescription("JOIN");
|
||||
std::vector<QueryPlanPtr> plans;
|
||||
|
@ -748,4 +748,15 @@ void TableJoin::resetToCross()
|
||||
this->table_join.kind = ASTTableJoin::Kind::Cross;
|
||||
}
|
||||
|
||||
bool TableJoin::allowParallelHashJoin() const
|
||||
{
|
||||
if (dictionary_reader || join_algorithm != JoinAlgorithm::PARALLEL_HASH)
|
||||
return false;
|
||||
if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner)
|
||||
return false;
|
||||
if (isSpecialStorage() || !oneDisjunct())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -191,10 +191,11 @@ public:
|
||||
bool allowMergeJoin() const;
|
||||
bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; }
|
||||
bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; }
|
||||
bool allowParallelHashJoin() const;
|
||||
bool forceHashJoin() const
|
||||
{
|
||||
/// HashJoin always used for DictJoin
|
||||
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH;
|
||||
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH || join_algorithm == JoinAlgorithm::PARALLEL_HASH;
|
||||
}
|
||||
|
||||
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
|
||||
|
@ -144,7 +144,7 @@ bool VersionMetadata::isRemovalTIDLocked() const
|
||||
|
||||
void VersionMetadata::setCreationTID(const TransactionID & tid, TransactionInfoContext * context)
|
||||
{
|
||||
/// NOTE ReplicatedMergeTreeBlockOutputStream may add one part multiple times
|
||||
/// NOTE ReplicatedMergeTreeSink may add one part multiple times
|
||||
assert(creation_tid.isEmpty() || creation_tid == tid);
|
||||
creation_tid = tid;
|
||||
if (context)
|
||||
|
@ -275,7 +275,7 @@ DDLQueryStatusSource::DDLQueryStatusSource(
|
||||
, node_path(zk_node_path)
|
||||
, context(context_)
|
||||
, watch(CLOCK_MONOTONIC_COARSE)
|
||||
, log(&Poco::Logger::get("DDLQueryStatusInputStream"))
|
||||
, log(&Poco::Logger::get("DDLQueryStatusSource"))
|
||||
{
|
||||
auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
|
||||
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
|
||||
|
@ -821,7 +821,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
element.profile_counters = info.profile_counters;
|
||||
|
||||
/// We need to refresh the access info since dependent views might have added extra information, either during
|
||||
/// creation of the view (PushingToViewsBlockOutputStream) or while executing its internal SELECT
|
||||
/// creation of the view (PushingToViews chain) or while executing its internal SELECT
|
||||
const auto & access_info = context_ptr->getQueryAccessInfo();
|
||||
element.query_databases.insert(access_info.databases.begin(), access_info.databases.end());
|
||||
element.query_tables.insert(access_info.tables.begin(), access_info.tables.end());
|
||||
|
@ -181,6 +181,10 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
|
||||
else if (!disk.empty())
|
||||
print_identifier(disk);
|
||||
}
|
||||
else if (type == Type::SYNC_DATABASE_REPLICA)
|
||||
{
|
||||
print_identifier(database->as<ASTIdentifier>()->name());
|
||||
}
|
||||
else if (type == Type::DROP_REPLICA)
|
||||
{
|
||||
print_drop_replica();
|
||||
|
@ -247,6 +247,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
|
||||
|
||||
case Type::SYNC_DATABASE_REPLICA:
|
||||
{
|
||||
parseQueryWithOnCluster(res, pos, expected);
|
||||
if (!parseDatabaseAsAST(pos, expected, res->database))
|
||||
return false;
|
||||
break;
|
||||
|
@ -637,7 +637,7 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c
|
||||
for (auto & desc : def.columns_to_aggregate)
|
||||
{
|
||||
if (!desc.created)
|
||||
throw Exception("Logical error in SummingSortedBlockInputStream, there are no description",
|
||||
throw Exception("Logical error in SummingSortedAlgorithm, there are no description",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (desc.is_agg_func_type)
|
||||
|
@ -131,7 +131,7 @@ IProcessor::Status IMergingTransformBase::prepare()
|
||||
return Status::Finished;
|
||||
}
|
||||
|
||||
/// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before.
|
||||
/// Do not disable inputs, so they can be executed in parallel.
|
||||
bool is_port_full = !output.canPush();
|
||||
|
||||
/// Push if has data.
|
||||
|
@ -170,8 +170,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
|
||||
}
|
||||
else
|
||||
{
|
||||
pipeline.resize(1);
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<AggregatingTransform>(header, transform_params);
|
||||
|
@ -15,9 +15,10 @@ JoinStep::JoinStep(
|
||||
const DataStream & left_stream_,
|
||||
const DataStream & right_stream_,
|
||||
JoinPtr join_,
|
||||
size_t max_block_size_)
|
||||
: join(std::move(join_))
|
||||
, max_block_size(max_block_size_)
|
||||
size_t max_block_size_,
|
||||
size_t max_streams_,
|
||||
bool keep_left_read_in_order_)
|
||||
: join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
|
||||
{
|
||||
input_streams = {left_stream_, right_stream_};
|
||||
output_stream = DataStream
|
||||
@ -31,7 +32,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
|
||||
if (pipelines.size() != 2)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps");
|
||||
|
||||
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors);
|
||||
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, max_streams, keep_left_read_in_order, &processors);
|
||||
}
|
||||
|
||||
void JoinStep::describePipeline(FormatSettings & settings) const
|
||||
|
@ -16,7 +16,9 @@ public:
|
||||
const DataStream & left_stream_,
|
||||
const DataStream & right_stream_,
|
||||
JoinPtr join_,
|
||||
size_t max_block_size_);
|
||||
size_t max_block_size_,
|
||||
size_t max_streams_,
|
||||
bool keep_left_read_in_order_);
|
||||
|
||||
String getName() const override { return "Join"; }
|
||||
|
||||
@ -29,6 +31,8 @@ public:
|
||||
private:
|
||||
JoinPtr join;
|
||||
size_t max_block_size;
|
||||
size_t max_streams;
|
||||
bool keep_left_read_in_order;
|
||||
Processors processors;
|
||||
};
|
||||
|
||||
|
@ -7,7 +7,7 @@ namespace DB
|
||||
{
|
||||
|
||||
/** A stream of blocks from which you can read the next block from an explicitly provided list.
|
||||
* Also see OneBlockInputStream.
|
||||
* Also see SourceFromSingleChunk.
|
||||
*/
|
||||
class BlocksListSource : public SourceWithProgress
|
||||
{
|
||||
|
@ -56,7 +56,7 @@ MySQLSource::MySQLSource(
|
||||
const Block & sample_block,
|
||||
const StreamSettings & settings_)
|
||||
: SourceWithProgress(sample_block.cloneEmpty())
|
||||
, log(&Poco::Logger::get("MySQLBlockInputStream"))
|
||||
, log(&Poco::Logger::get("MySQLSource"))
|
||||
, connection{std::make_unique<Connection>(entry, query_str)}
|
||||
, settings{std::make_unique<StreamSettings>(settings_)}
|
||||
{
|
||||
@ -64,10 +64,10 @@ MySQLSource::MySQLSource(
|
||||
initPositionMappingFromQueryResultStructure();
|
||||
}
|
||||
|
||||
/// For descendant MySQLWithFailoverBlockInputStream
|
||||
/// For descendant MySQLWithFailoverSource
|
||||
MySQLSource::MySQLSource(const Block &sample_block_, const StreamSettings & settings_)
|
||||
: SourceWithProgress(sample_block_.cloneEmpty())
|
||||
, log(&Poco::Logger::get("MySQLBlockInputStream"))
|
||||
, log(&Poco::Logger::get("MySQLSource"))
|
||||
, settings(std::make_unique<StreamSettings>(settings_))
|
||||
{
|
||||
description.init(sample_block_);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user