Merge master

This commit is contained in:
kssenii 2022-05-10 19:31:22 +02:00
commit 61f2737e17
794 changed files with 4111 additions and 157409 deletions

View File

@ -1,7 +1,7 @@
### Changelog category (leave one):
- New Feature
- Improvement
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
- Bug Fix (user-visible misbehavior in official stable or prestable release)
- Performance Improvement
- Backward Incompatible Change
- Build/Testing/Packaging Improvement

View File

@ -1,48 +0,0 @@
name: "CodeQL"
"on":
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
env:
CC: clang-14
CXX: clang++-14
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ['cpp']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'true'
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
- name: Build
run: |
sudo apt-get install -yq ninja-build
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
mkdir build
cd build
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
ninja
rm -rf ../contrib
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
PARAMS="--host ... --secure --password ..."
if [ -x ./clickhouse ]
then
CLICKHOUSE_CLIENT="./clickhouse client"
elif command -v clickhouse-client >/dev/null 2>&1
then
CLICKHOUSE_CLIENT="clickhouse-client"
else
echo "clickhouse-client is not found"
exit 1
fi
QUERY_ID_PREFIX="benchmark_$RANDOM"
QUERY_NUM=1
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query
do
for i in $(seq 1 $TRIES)
do
QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}"
${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query"
echo -n '.'
done
QUERY_NUM=$((QUERY_NUM + 1))
echo
done
sleep 10
${CLICKHOUSE_CLIENT} ${PARAMS} --query "
WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run
SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ','
FROM clusterAllReplicas(default, system.query_log)
WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%'
GROUP BY num ORDER BY num FORMAT TSV
"

View File

@ -364,10 +364,8 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_ZLIB)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
if (ARROW_WITH_ZSTD)
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
endif ()
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
add_library(_arrow ${ARROW_SRCS})
@ -383,7 +381,6 @@ target_link_libraries(_arrow PRIVATE
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd
ch_contrib::zstd
)
target_link_libraries(_arrow PUBLIC _orc)

View File

@ -101,7 +101,12 @@ EOL
function stop()
{
clickhouse stop
clickhouse stop --do-not-kill && return
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
kill -TERM "$(pidof gdb)" ||:
sleep 5
gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" ||:
clickhouse stop --force
}
function start()
@ -201,7 +206,7 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
@ -387,7 +392,7 @@ for table in query_log trace_log; do
done
# Write check result into check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps (see gcore)

View File

@ -12,7 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN "
SKIPPED_SIGN = "[ SKIPPED "
HUNG_SIGN = "Found hung queries in processlist"
NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
RETRIES_SIGN = "Some tests were restarted"
@ -25,14 +25,14 @@ def process_test_log(log_path):
success = 0
hung = False
retries = False
task_timeout = True
success_finish = False
test_results = []
with open(log_path, "r") as test_file:
for line in test_file:
original_line = line
line = line.strip()
if any(s in line for s in NO_TASK_TIMEOUT_SIGNS):
task_timeout = False
if any(s in line for s in SUCCESS_FINISH_SIGNS):
success_finish = True
if HUNG_SIGN in line:
hung = True
if RETRIES_SIGN in line:
@ -81,7 +81,7 @@ def process_test_log(log_path):
failed,
success,
hung,
task_timeout,
success_finish,
retries,
test_results,
)
@ -108,7 +108,7 @@ def process_result(result_path):
failed,
success,
hung,
task_timeout,
success_finish,
retries,
test_results,
) = process_test_log(result_path)
@ -123,10 +123,10 @@ def process_result(result_path):
description = "Some queries hung, "
state = "failure"
test_results.append(("Some queries hung", "FAIL", "0", ""))
elif task_timeout:
description = "Timeout, "
elif not success_finish:
description = "Tests are not finished, "
state = "failure"
test_results.append(("Timeout", "FAIL", "0", ""))
test_results.append(("Tests are not finished", "FAIL", "0", ""))
elif retries:
description = "Some tests restarted, "
test_results.append(("Some tests restarted", "SKIPPED", "0", ""))

View File

@ -694,6 +694,49 @@ auto s = std::string{"Hello"};
**2.** Exception specifiers from C++03 are not used.
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
```
// Traditional way without syntactic sugar
template <typename G, typename = std::enable_if_t<std::is_same<G, F>::value, void>> // SFINAE via std::enable_if, usage of ::value
std::pair<int, int> func(const E<G> & e) // explicitly specified return type
{
if (elements.count(e)) // .count() membership test
{
// ...
}
elements.erase(
std::remove_if(
elements.begin(), elements.end(),
[&](const auto x){
return x == 1;
}),
elements.end()); // remove-erase idiom
return std::make_pair(1, 2); // create pair via make_pair()
}
// With syntactic sugar (C++14/17/20)
template <typename G>
requires std::same_v<G, F> // SFINAE via C++20 concept, usage of C++14 template alias
auto func(const E<G> & e) // auto return type (C++14)
{
if (elements.contains(e)) // C++20 .contains membership test
{
// ...
}
elements.erase_if(
elements,
[&](const auto x){
return x == 1;
}); // C++20 std::erase_if
return {1, 2}; // or: return std::pair(1, 2); // create pair via initialization list or value initialization (C++17)
}
```
## Platform {#platform}
**1.** We write code for a specific platform.

View File

@ -45,7 +45,7 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
# for hits_v1
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# for hits_100m_obfuscated
clickhouse-client --query="CREATE TABLE hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# import data
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000

View File

@ -9,66 +9,66 @@ ClickHouse can accept and return data in various formats. A format supported for
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
The supported formats are:
| Input | Output |
|-------------------------------------------------------------------------------------------|-------|-------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Prometheus](#prometheus) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✔ |
| [LineAsString](#lineasstring) | ✔ | ✗ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
| [MySQLDump](#mysqldump) | ✔ | ✗ |
| Format | Input | Output |
|-------------------------------------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Prometheus](#prometheus) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✔ |
| [LineAsString](#lineasstring) | ✔ | ✗ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
| [MySQLDump](#mysqldump) | ✔ | ✗ |
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.

View File

@ -426,7 +426,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `status` — use with `static` type, response status code.
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.

View File

@ -0,0 +1,57 @@
---
sidebar_position: 20
sidebar_label: PostgreSQL Interface
---
# PostgreSQL Interface
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift).
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
```xml
<clickhouse>
<postgresql_port>9005</postgresql_port>
</clickhouse>
```
Startup your ClickHouse server and look for a log message similar to the following that mentions **Listening for PostgreSQL compatibility protocol**:
```response
{} <Information> Application: Listening for PostgreSQL compatibility protocol: 127.0.0.1:9005
```
## Connect psql to ClickHouse
The following command demonstrates how to connect the PostgreSQL client `psql` to ClickHouse:
```bash
psql -p [port] -h [hostname] -U [username] [database_name]
```
For example:
```bash
psql -p 9005 -h 127.0.0.1 -U alice default
```
:::note
The `psql` client requires a login with a password, so you will not be able connect using the `default` user with no password. Either assign a password to the `default` user, or login as a different user.
:::
The `psql` client prompts for the password:
```response
Password for user alice:
psql (14.2, server 22.3.1.1)
WARNING: psql major version 14, server major version 22.
Some psql features might not work.
Type "help" for help.
default=>
```
And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse.
[Original article](https://clickhouse.com/docs/en/interfaces/postgresql)

View File

@ -1,48 +0,0 @@
---
sidebar_position: 108
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
**Arguments**
- `column` The value.
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Or the last 10:
``` sql
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number, negate(number))─┐
│ [99,98,97,96,95,94,93,92,91,90] │
└──────────────────────────────────────────────┘
```

View File

@ -620,9 +620,9 @@ arraySlice(array, offset[, length])
**Arguments**
- `array` Array of data.
- `offset` Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
- `length` The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
- `array` Array of data.
- `offset` Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
- `length` The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length]`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
**Example**

View File

@ -130,13 +130,9 @@ bitSlice(s, offset[, length])
**Arguments**
- `s` — s is [String](../../sql-reference/data-types/string.md)
or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an
indent on the right. Numbering of the bits begins with 1.
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [
offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string].
If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
**Returned value**

View File

@ -478,3 +478,17 @@ Result:
│ 0 │
└──────────────────────────────────────────────┘
```
Query:
``` sql
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
```
Result:
``` text
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
│ 0 │
└────────────────────────────────────────────────────────────────────┘
```

View File

@ -480,7 +480,7 @@ Result:
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
Returns a substring starting with the byte from the offset index that is length bytes long. Character indexing starts from one (as in standard SQL). The offset and length arguments must be constants.
Returns a substring starting with the byte from the offset index that is length bytes long. Character indexing starts from one (as in standard SQL).
## substringUTF8(s, offset, length) {#substringutf8}

View File

@ -410,7 +410,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
- `status` — используется с типом `static`, возвращает код состояния ответа.
- `content_type` — используется с типом `static`, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса file:// or config://, находит содержимое из файла или конфигурации, отправленного клиенту.

View File

@ -31,7 +31,5 @@ sidebar_label: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64
- `UInt16` — \[0 : 65535\]
- `UInt32` — \[0 : 4294967295\]
- `UInt64` — \[0 : 18446744073709551615\]
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
`UInt128` пока не реализован.

View File

@ -21,7 +21,7 @@ LowCardinality(data_type)
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.

View File

@ -574,9 +574,9 @@ arraySlice(array, offset[, length])
**Аргументы**
- `array` массив данных.
- `offset` отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1.
- `length` длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
- `array` массив данных.
- `offset` отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчёт элементов массива начинается с 1.
- `length` длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
**Пример**

View File

@ -446,3 +446,17 @@ SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16');
│ 0 │
└──────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
```
Результат:
``` text
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
│ 0 │
└────────────────────────────────────────────────────────────────────┘
```

View File

@ -157,6 +157,7 @@ def build(args):
if not args.skip_website:
website.process_benchmark_results(args)
website.minify_website(args)
redirects.build_static_redirects(args)

View File

@ -1,7 +1,10 @@
import hashlib
import json
import logging
import os
import shutil
import subprocess
import bs4
import util
@ -178,6 +181,59 @@ def build_website(args):
f.write(content.encode("utf-8"))
def get_css_in(args):
return [
f"'{args.website_dir}/css/bootstrap.css'",
f"'{args.website_dir}/css/docsearch.css'",
f"'{args.website_dir}/css/base.css'",
f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'",
]
def get_js_in(args):
return [
f"'{args.website_dir}/js/jquery.js'",
f"'{args.website_dir}/js/popper.js'",
f"'{args.website_dir}/js/bootstrap.js'",
f"'{args.website_dir}/js/sentry.js'",
f"'{args.website_dir}/js/base.js'",
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'",
]
def minify_website(args):
css_in = " ".join(get_css_in(args))
css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f"{args.output_dir}/docs/css")
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, "wb+") as f:
f.write(output)
with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = " ".join(get_js_in(args))
js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f"{args.output_dir}/docs/js")
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
with open(js_out, "wb+") as f:
f.write(output)
with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = {

View File

@ -397,9 +397,9 @@ SELECT arrayPushFront(['b'], 'a') AS res
**参数**
- `array` 数组。
- `offset` 数组的偏移。正值表示左侧的偏移量负值表示右侧的缩进值。数组下标从1开始。
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offsetarray_length - length`。如果省略该值,则该函数返回`[offsetthe_end_of_array]`。
- `array` 数组。
- `offset` 数组的偏移。正值表示左侧的偏移量负值表示右侧的缩进值。数组下标从1开始。
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offsetarray_length - length]`。如果省略该值,则该函数返回`[offsetthe_end_of_array]`。
**示例**

View File

@ -4,6 +4,7 @@
#include <iostream>
#include <iomanip>
#include <optional>
#include <string_view>
#include <Common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
@ -48,6 +49,7 @@
#endif
namespace fs = std::filesystem;
using namespace std::literals;
namespace DB
@ -1038,6 +1040,158 @@ void Client::processConfig()
client_info.quota_key = config().getString("quota_key", "");
}
void Client::readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
* where possible args are file, name, format, structure, types;
* - param arguments for prepared statements.
* Split these groups before processing.
*/
bool in_external_group = false;
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--external")
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
}
/// Options with value after equal sign.
else if (
in_external_group
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|| arg.starts_with("--types=")))
{
external_tables_arguments.back().emplace_back(arg);
}
/// Options with value after whitespace.
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
{
if (arg_num + 1 < argc)
{
external_tables_arguments.back().emplace_back(arg);
++arg_num;
arg = argv[arg_num];
external_tables_arguments.back().emplace_back(arg);
}
else
break;
}
else
{
in_external_group = false;
if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg);
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg.starts_with("--host") || arg.starts_with("-h"))
{
std::string host_arg;
/// --host host
if (arg == "--host" || arg == "-h")
{
++arg_num;
if (arg_num >= argc)
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
host_arg = "--host=";
host_arg.append(arg);
}
else
host_arg = arg;
/// --port port1 --host host1
if (!prev_port_arg.empty())
{
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
prev_port_arg.clear();
}
else
{
/// --host host1 --host host2
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
prev_host_arg = host_arg;
}
}
else if (arg.starts_with("--port"))
{
auto port_arg = String{arg};
/// --port port
if (arg == "--port")
{
port_arg.push_back('=');
++arg_num;
if (arg_num >= argc)
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
port_arg.append(arg);
}
/// --host host1 --port port1
if (!prev_host_arg.empty())
{
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
prev_host_arg.clear();
}
else
{
/// --port port1 --port port2
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
}
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
}
}

View File

@ -36,6 +36,13 @@ protected:
void processConfig() override;
void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) override;
private:
void printChangedSettings() const;
std::vector<String> loadWarningMessages();

View File

@ -68,6 +68,7 @@ namespace ErrorCodes
extern const int NOT_ENOUGH_SPACE;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_KILL;
extern const int BAD_ARGUMENTS;
}
}
@ -1062,8 +1063,11 @@ namespace
return pid;
}
int stop(const fs::path & pid_file, bool force)
int stop(const fs::path & pid_file, bool force, bool do_not_kill)
{
if (force && do_not_kill)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible");
UInt64 pid = isRunning(pid_file);
if (!pid)
@ -1092,9 +1096,15 @@ namespace
if (try_num == num_tries)
{
fmt::print("Will terminate forcefully.\n", pid);
if (do_not_kill)
{
fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid);
return 1;
}
fmt::print("Will terminate forcefully (pid = {}).\n", pid);
if (0 == kill(pid, 9))
fmt::print("Sent kill signal.\n", pid);
fmt::print("Sent kill signal (pid = {}).\n", pid);
else
throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR);
@ -1175,6 +1185,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("force", po::bool_switch(), "Stop with KILL signal instead of TERM")
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
;
po::variables_map options;
@ -1189,7 +1200,9 @@ int mainEntryClickHouseStop(int argc, char ** argv)
fs::path prefix = options["prefix"].as<std::string>();
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
return stop(pid_file, options["force"].as<bool>());
bool force = options["force"].as<bool>();
bool do_not_kill = options["do-not-kill"].as<bool>();
return stop(pid_file, force, do_not_kill);
}
catch (...)
{
@ -1247,6 +1260,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
;
po::variables_map options;
@ -1265,7 +1279,9 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
fs::path config = prefix / options["config-path"].as<std::string>() / "config.xml";
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
if (int res = stop(pid_file, options["force"].as<bool>()))
bool force = options["force"].as<bool>();
bool do_not_kill = options["do-not-kill"].as<bool>();
if (int res = stop(pid_file, force, do_not_kill))
return res;
return start(user, executable, config, pid_file);

View File

@ -738,6 +738,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
{
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
const char * arg = argv[arg_num];
common_arguments.emplace_back(arg);
}
}
}
#pragma GCC diagnostic ignored "-Wunused-function"

View File

@ -45,6 +45,8 @@ protected:
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
void processConfig() override;
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
void updateLoggerLevel(const String & logs_level) override;

View File

@ -540,7 +540,7 @@ static void sanityChecks(Server & server)
try
{
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
server.context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded.");
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded.");
}
catch (...)
{
@ -558,7 +558,7 @@ static void sanityChecks(Server & server)
try
{
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
server.context()->addWarningMessage("Linux transparent hugepage are set to \"always\".");
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\".");
}
catch (...)
{
@ -1088,11 +1088,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
if (config->has("global_memory_usage_overcommit_max_wait_microseconds"))
{
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 0);
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
}
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 200);
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
// FIXME logging-related things need synchronization -- see the 'Logger * log' saved
@ -1294,17 +1291,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Listening for {}", server.getDescription());
}
auto & access_control = global_context->getAccessControl();
if (config().has("custom_settings_prefixes"))
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
/// Initialize access storages.
auto & access_control = global_context->getAccessControl();
try
{
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
}
catch (...)
{

View File

@ -545,6 +545,14 @@
-->
</user_directories>
<access_control_improvements>
<!-- Enables logic that users without permissive row policies can still read rows using a SELECT query.
For example, if there two users A, B and a row policy is defined only for A, then
if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
By default this setting is false for compatibility with earlier access configurations. -->
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
</access_control_improvements>
<!-- Default profile of settings. -->
<default_profile>default</default_profile>

View File

@ -129,8 +129,8 @@
#query_div
{
/* Make enough space for even huge queries. */
height: 20%;
/* Make enough space for medium/large queries but allowing query textarea to grow. */
min-height: 20%;
}
#query
@ -748,7 +748,7 @@
const max_rows = 10000 / response.meta.length;
let row_num = 0;
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(U?Int|Decimal|Float)/));
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);

View File

@ -6,9 +6,6 @@
<profiles>
<!-- Default settings. -->
<default>
<!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage>
<!-- How to choose between replicas during distributed query processing.
random - choose random replica from set of replicas with minimum number of errors
nearest_hostname - from set of replicas with minimum number of errors, choose replica

View File

@ -149,6 +149,24 @@ AccessControl::AccessControl()
AccessControl::~AccessControl() = default;
void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
if (config_.has("custom_settings_prefixes"))
setCustomSettingsPrefixes(config_.getString("custom_settings_prefixes"));
setNoPasswordAllowed(config_.getBool("allow_no_password", true));
setPlaintextPasswordAllowed(config_.getBool("allow_plaintext_password", true));
setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool(
"access_control_improvements.users_without_row_policies_can_read_rows",
false /* false because we need to be compatible with earlier access configurations */));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
}
void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
{
auto storages = getStoragesPtr();
@ -170,11 +188,7 @@ void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguratio
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
{
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
is_no_password_allowed_function, is_plaintext_password_allowed_function);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
new_storage->setConfig(users_config_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
@ -207,11 +221,7 @@ void AccessControl::addUsersConfigStorage(
return;
}
}
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
is_no_password_allowed_function, is_plaintext_password_allowed_function);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());

View File

@ -50,6 +50,9 @@ public:
AccessControl();
~AccessControl() override;
void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_);
/// Parses access entities from a configuration loaded from users.xml.
/// This function add UsersConfigAccessStorage if it wasn't added before.
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
@ -122,6 +125,12 @@ public:
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
bool isPlaintextPasswordAllowed() const;
/// Enables logic that users without permissive row policies can still read rows using a SELECT query.
/// For example, if there two users A, B and a row policy is defined only for A, then
/// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; }
bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; }
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
@ -178,6 +187,7 @@ private:
std::unique_ptr<CustomSettingsPrefixes> custom_settings_prefixes;
std::atomic_bool allow_plaintext_password = true;
std::atomic_bool allow_no_password = true;
std::atomic_bool users_without_row_policies_can_read_rows = false;
};
}

View File

@ -28,17 +28,25 @@ namespace
permissions.push_back(filter);
}
ASTPtr getResult() &&
ASTPtr getResult(bool users_without_row_policies_can_read_rows) &&
{
/// Process permissive filters.
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
if (!permissions.empty() || !users_without_row_policies_can_read_rows)
{
/// Process permissive filters.
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
}
/// Process restrictive filters.
auto result = makeASTForLogicalAnd(std::move(restrictions));
ASTPtr result;
if (!restrictions.empty())
result = makeASTForLogicalAnd(std::move(restrictions));
bool value;
if (tryGetLiteralBool(result.get(), value) && value)
result = nullptr; /// The condition is always true, no need to check it.
if (result)
{
bool value;
if (tryGetLiteralBool(result.get(), value) && value)
result = nullptr; /// The condition is always true, no need to check it.
}
return result;
}
@ -234,7 +242,7 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
{
auto & mixed_filter = (*mixed_filters)[key];
mixed_filter.database_and_table_name = mixer.database_and_table_name;
mixed_filter.ast = std::move(mixer.mixer).getResult();
mixed_filter.ast = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
}
enabled.mixed_filters.store(mixed_filters);

View File

@ -3,6 +3,7 @@
#include <Access/RowPolicy.h>
#include <Access/User.h>
#include <Access/SettingsProfile.h>
#include <Access/AccessControl.h>
#include <Dictionaries/IDictionary.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/StringUtils/StringUtils.h>
@ -339,7 +340,7 @@ namespace
}
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config)
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config, bool users_without_row_policies_can_read_rows)
{
std::map<std::pair<String /* database */, String /* table */>, std::unordered_map<String /* user */, String /* filter */>> all_filters_map;
@ -395,8 +396,19 @@ namespace
const auto & [database, table_name] = database_and_table_name;
for (const String & user_name : user_names)
{
String filter;
auto it = user_to_filters.find(user_name);
String filter = (it != user_to_filters.end()) ? it->second : "1";
if (it != user_to_filters.end())
{
filter = it->second;
}
else
{
if (users_without_row_policies_can_read_rows)
continue;
else
filter = "1";
}
auto policy = std::make_shared<RowPolicy>();
policy->setFullName(user_name, database, table_name);
@ -411,7 +423,7 @@ namespace
SettingsProfileElements parseSettingsConstraints(const Poco::Util::AbstractConfiguration & config,
const String & path_to_constraints,
Fn<void(std::string_view)> auto && check_setting_name_function)
const AccessControl & access_control)
{
SettingsProfileElements profile_elements;
Poco::Util::AbstractConfiguration::Keys keys;
@ -419,8 +431,7 @@ namespace
for (const String & setting_name : keys)
{
if (check_setting_name_function)
check_setting_name_function(setting_name);
access_control.checkSettingNameIsAllowed(setting_name);
SettingsProfileElement profile_element;
profile_element.setting_name = setting_name;
@ -448,7 +459,7 @@ namespace
std::shared_ptr<SettingsProfile> parseSettingsProfile(
const Poco::Util::AbstractConfiguration & config,
const String & profile_name,
Fn<void(std::string_view)> auto && check_setting_name_function)
const AccessControl & access_control)
{
auto profile = std::make_shared<SettingsProfile>();
profile->setName(profile_name);
@ -470,13 +481,12 @@ namespace
if (key == "constraints" || key.starts_with("constraints["))
{
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, check_setting_name_function));
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, access_control));
continue;
}
const auto & setting_name = key;
if (check_setting_name_function)
check_setting_name_function(setting_name);
access_control.checkSettingNameIsAllowed(setting_name);
SettingsProfileElement profile_element;
profile_element.setting_name = setting_name;
@ -490,7 +500,7 @@ namespace
std::vector<AccessEntityPtr> parseSettingsProfiles(
const Poco::Util::AbstractConfiguration & config,
Fn<void(std::string_view)> auto && check_setting_name_function)
const AccessControl & access_control)
{
Poco::Util::AbstractConfiguration::Keys profile_names;
config.keys("profiles", profile_names);
@ -502,7 +512,7 @@ namespace
{
try
{
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
profiles.push_back(parseSettingsProfile(config, profile_name, access_control));
}
catch (Exception & e)
{
@ -515,13 +525,8 @@ namespace
}
}
UsersConfigAccessStorage::UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
: UsersConfigAccessStorage(STORAGE_TYPE, check_setting_name_function_, is_no_password_allowed_function_, is_plaintext_password_allowed_function_)
{
}
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
: IAccessStorage(storage_name_), check_setting_name_function(check_setting_name_function_),is_no_password_allowed_function(is_no_password_allowed_function_), is_plaintext_password_allowed_function(is_plaintext_password_allowed_function_)
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_)
: IAccessStorage(storage_name_), access_control(access_control_)
{
}
@ -563,16 +568,16 @@ void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfigu
{
try
{
bool no_password_allowed = is_no_password_allowed_function();
bool plaintext_password_allowed = is_plaintext_password_allowed_function();
bool no_password_allowed = access_control.isNoPasswordAllowed();
bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed();
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & entity : parseUsers(config, no_password_allowed, plaintext_password_allowed))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseQuotas(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseRowPolicies(config))
for (const auto & entity : parseRowPolicies(config, access_control.isEnabledUsersWithoutRowPoliciesCanReadRows()))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
for (const auto & entity : parseSettingsProfiles(config, access_control))
all_entities.emplace_back(generateID(*entity), entity);
memory_storage.setAll(all_entities);
}

View File

@ -12,6 +12,7 @@ namespace Poco::Util
namespace DB
{
class AccessControl;
class ConfigReloader;
/// Implementation of IAccessStorage which loads all from users.xml periodically.
@ -20,13 +21,8 @@ class UsersConfigAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "users.xml";
using CheckSettingNameFunction = std::function<void(const std::string_view &)>;
using IsNoPasswordFunction = std::function<bool()>;
using IsPlaintextPasswordFunction = std::function<bool()>;
UsersConfigAccessStorage(const String & storage_name_ = STORAGE_TYPE, const CheckSettingNameFunction & check_setting_name_function_ = {}, const IsNoPasswordFunction & is_no_password_allowed_function_ ={}, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_ = {}); /// NOLINT
UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_); /// NOLINT
UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_);
~UsersConfigAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -58,10 +54,8 @@ private:
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override;
const AccessControl & access_control;
MemoryAccessStorage memory_storage;
CheckSettingNameFunction check_setting_name_function;
IsNoPasswordFunction is_no_password_allowed_function;
IsPlaintextPasswordFunction is_plaintext_password_allowed_function;
String path;
std::unique_ptr<ConfigReloader> config_reloader;
mutable std::mutex load_mutex;

View File

@ -1,147 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <Common/FieldVisitorConvertToNumber.h>
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
};
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedFieldType
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
AggregateFunctionGroupArraySorted;
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
};
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
{
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
}
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
{
#define DISPATCH(A, C, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
WhichDataType which(argument_types[0]);
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
#undef DISPATCH
#undef DISPATCH_NUMERIC
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
else
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
if (params.size() == 1)
{
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
throw Exception(
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (k == 0)
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
threshold = k;
}
else if (!params.empty())
{
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (argument_types.size() == 2)
{
if (isNumber(argument_types[1]))
{
#define DISPATCH2(A, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
#define DISPATCH(A) DISPATCH2(A, A)
WhichDataType which(argument_types[1]);
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH2(Enum8, Int8)
DISPATCH2(Enum16, Int16)
#undef DISPATCH
#undef DISPATCH2
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
}
else
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
}
}
else if (argument_types.size() == 1)
{
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
}
else
{
throw Exception(
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
}
}

View File

@ -1,315 +0,0 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename TColumn, bool is_plain>
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
{
if constexpr (std::is_same_v<TColumn, StringRef>)
{
if constexpr (is_plain)
{
StringRef str = column->getDataAt(row);
auto ptr = arena->alloc(str.size);
std::copy(str.data, str.data + str.size, ptr);
return StringRef(ptr, str.size);
}
else
{
const char * begin = nullptr;
return column->serializeValueIntoArena(row, *arena, begin);
}
}
else
{
if constexpr (std::is_same_v<TColumn, UInt64>)
return column->getUInt(row);
else
return column->getInt(row);
}
}
template <typename TColumn, typename TFilter = void>
size_t
getFirstNElements_low_threshold(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
for (size_t i = 0; i < threshold; i++)
{
results[i] = 0;
}
threshold = std::min(row_end - row_begin, threshold);
size_t current_max = 0;
size_t cur;
size_t z;
for (size_t i = row_begin; i < row_end; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
//Starting from the highest values and we look for the immediately lower than the given one
for (cur = current_max; cur > 0; cur--)
{
if (data[i] > data[results[cur - 1]])
break;
}
if (cur < threshold)
{
//Move all the higher values 1 position to the right
for (z = std::min(threshold - 1, current_max); z > cur; z--)
results[z] = results[z - 1];
if (current_max < threshold)
++current_max;
//insert element into the given position
results[cur] = i;
}
}
return current_max;
}
template <typename T>
struct SortableItem
{
T a;
size_t b;
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
};
template <typename TColumn, typename TFilter = void>
size_t getFirstNElements_high_threshold(
const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
std::vector<SortableItem<TColumn>> dataIndexed(row_end);
size_t num_elements_filtered = 0;
for (size_t i = row_begin; i < row_end; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
dataIndexed.data()[num_elements_filtered].a = data[i];
dataIndexed.data()[num_elements_filtered].b = i;
num_elements_filtered++;
}
threshold = std::min(num_elements_filtered, threshold);
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
for (size_t i = 0; i < threshold; i++)
{
results[i] = dataIndexed[i].b;
}
return threshold;
}
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
template <typename TColumn>
size_t getFirstNElements(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
{
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
{
if (filter != nullptr)
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results, filter);
else
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results);
}
else
{
if (filter != nullptr)
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results, filter);
else
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results);
}
}
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
{
protected:
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
using Base = IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>;
UInt64 threshold;
DataTypePtr & input_data_type;
mutable std::mutex mutex;
static void deserializeAndInsert(StringRef str, IColumn & data_to);
public:
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>(argument_types_, params)
, threshold(threshold_)
, input_data_type(this->argument_types[0])
{
}
void create(AggregateDataPtr place) const override
{
Base::create(place);
this->data(place).threshold = threshold;
}
String getName() const override { return "groupArraySorted"; }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
bool allocatesMemoryInArena() const override
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
return true;
else
return false;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
data.add(
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
}
else
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
}
}
template <typename TColumn, bool is_plain, typename TFunc>
void
forFirstRows(size_t row_begin, size_t row_end, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
{
const TColumn * values = nullptr;
std::unique_ptr<std::vector<TColumn>> values_vector;
std::vector<size_t> best_rows(threshold);
if constexpr (std::is_same_v<TColumn, StringRef>)
{
values_vector.reset(new std::vector<TColumn>(row_end));
for (size_t i = row_begin; i < row_end; i++)
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
values = (*values_vector).data();
}
else
{
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
values = column.getData().data();
}
const UInt8 * filter = nullptr;
StringRef refFilter;
if (if_argument_pos >= 0)
{
refFilter = columns[if_argument_pos]->getRawData();
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
}
size_t num_elements = getFirstNElements(values, row_begin, row_end, threshold, best_rows.data(), filter);
for (size_t i = 0; i < num_elements; i++)
{
func(best_rows[i], values);
}
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
forFirstRows<TColumnB, is_plain_b>(
row_begin, row_end, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
});
}
else
{
forFirstRows<TColumnA, is_plain_a>(
row_begin, row_end, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
{
data.add(values[row]);
});
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).deserialize(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
auto & values = this->data(place).values;
offsets_to.push_back(offsets_to.back() + values.size());
IColumn & data_to = arr_to.getData();
for (auto value : values)
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
{
auto str = State::itemValue(value);
if constexpr (is_plain_a)
{
data_to.insertData(str.data, str.size);
}
else
{
data_to.deserializeAndInsertFromArena(str.data);
}
}
else
{
data_to.insert(State::itemValue(value));
}
}
}
};
}

View File

@ -1,162 +0,0 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/VarInt.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
namespace DB
{
template <typename T>
static void writeOneItem(WriteBuffer & buf, T item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
writeVarInt(item, buf);
}
else
{
writeVarUInt(item, buf);
}
}
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
{
writeBinary(item, buf);
}
template <typename T>
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
DB::Int64 val;
readVarT(val, buf);
item = val;
}
else
{
DB::UInt64 val;
readVarT(val, buf);
item = val;
}
}
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
{
item = readStringBinaryInto(*arena, buf);
}
template <typename Storage>
struct AggregateFunctionGroupArraySortedDataBase
{
typedef typename Storage::value_type ValueType;
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
inline void narrowDown()
{
while (values.size() > threshold)
values.erase(--values.end());
}
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
{
values.merge(Storage(other.values));
narrowDown();
}
void serialize(WriteBuffer & buf) const
{
writeOneItem(buf, UInt64(values.size()));
for (auto value : values)
{
serializeItem(buf, value);
}
}
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
void deserialize(ReadBuffer & buf, Arena * arena)
{
values.clear();
UInt64 length;
readOneItem(buf, nullptr, length);
while (length--)
{
values.insert(deserializeItem(buf, arena));
}
narrowDown();
}
UInt64 threshold;
Storage values;
};
template <typename T, bool expr_sorted, typename TIndex>
struct AggregateFunctionGroupArraySortedData
{
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
using Base::Base;
void add(T item, TIndex weight)
{
Base::values.insert({weight, item});
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
{
writeOneItem(buf, value.first);
writeOneItem(buf, value.second);
}
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
TIndex first;
T second;
readOneItem(buf, arena, first);
readOneItem(buf, arena, second);
return {first, second};
}
static T itemValue(typename Base::ValueType & value) { return value.second; }
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
using Base::Base;
void add(T item)
{
Base::values.insert(item);
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
T value;
readOneItem(buf, arena, value);
return value;
}
static T itemValue(typename Base::ValueType & value) { return value; }
};
}

View File

@ -59,7 +59,6 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -131,7 +130,6 @@ void registerAggregateFunctions()
registerAggregateFunctionIntervalLengthSum(factory);
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerWindowFunctions(factory);
}

View File

@ -2,7 +2,6 @@
#include <iostream>
#include <iomanip>
#include <string_view>
#include <filesystem>
#include <map>
#include <unordered_map>
@ -392,7 +391,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initBlockOutputStream(block, parsed_query);
initOutputFormat(block, parsed_query);
/// The header block containing zero rows was used to initialize
/// output_format, do not output it.
@ -439,14 +438,14 @@ void ClientBase::onLogData(Block & block)
void ClientBase::onTotals(Block & block, ASTPtr parsed_query)
{
initBlockOutputStream(block, parsed_query);
initOutputFormat(block, parsed_query);
output_format->setTotals(block);
}
void ClientBase::onExtremes(Block & block, ASTPtr parsed_query)
{
initBlockOutputStream(block, parsed_query);
initOutputFormat(block, parsed_query);
output_format->setExtremes(block);
}
@ -466,7 +465,7 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
}
void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
try
{
if (!output_format)
@ -1489,7 +1488,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
std::cout << std::endl
<< processed_rows << " row" << (processed_rows == 1 ? "" : "s")
<< " in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
progress_indication.writeFinalProgress();
std::cout << std::endl << std::endl;
}
@ -2059,156 +2060,6 @@ void ClientBase::showClientVersion()
}
void ClientBase::readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
* where possible args are file, name, format, structure, types;
* - param arguments for prepared statements.
* Split these groups before processing.
*/
bool in_external_group = false;
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--external")
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
}
/// Options with value after equal sign.
else if (
in_external_group
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|| arg.starts_with("--types=")))
{
external_tables_arguments.back().emplace_back(arg);
}
/// Options with value after whitespace.
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
{
if (arg_num + 1 < argc)
{
external_tables_arguments.back().emplace_back(arg);
++arg_num;
arg = argv[arg_num];
external_tables_arguments.back().emplace_back(arg);
}
else
break;
}
else
{
in_external_group = false;
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg.starts_with("--host") || arg.starts_with("-h"))
{
std::string host_arg;
/// --host host
if (arg == "--host" || arg == "-h")
{
++arg_num;
if (arg_num >= argc)
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
host_arg = "--host=";
host_arg.append(arg);
}
else
host_arg = arg;
/// --port port1 --host host1
if (!prev_port_arg.empty())
{
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
prev_port_arg.clear();
}
else
{
/// --host host1 --host host2
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
prev_host_arg = host_arg;
}
}
else if (arg.starts_with("--port"))
{
auto port_arg = String{arg};
/// --port port
if (arg == "--port")
{
port_arg.push_back('=');
++arg_num;
if (arg_num >= argc)
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
port_arg.append(arg);
}
/// --host host1 --port port1
if (!prev_host_arg.empty())
{
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
prev_host_arg.clear();
}
else
{
/// --port port1 --port port2
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
}
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
}
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)

View File

@ -106,6 +106,14 @@ protected:
bool processQueryText(const String & text);
virtual void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
private:
void receiveResult(ASTPtr parsed_query);
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
@ -131,19 +139,13 @@ private:
void sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query);
void sendExternalTables(ASTPtr parsed_query);
void initBlockOutputStream(const Block & block, ASTPtr parsed_query);
void initOutputFormat(const Block & block, ASTPtr parsed_query);
void initLogsOutputStream();
String prompt() const;
void resetOutput();
void outputQueryInfo(bool echo_query_);
void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments);
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
void updateSuggest(const ASTPtr & ast);

View File

@ -90,7 +90,7 @@ void LRUFileCache::initialize()
}
void LRUFileCache::useCell(
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & /* cache_lock */)
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock)
{
auto file_segment = cell.file_segment;
@ -109,7 +109,7 @@ void LRUFileCache::useCell(
if (cell.queue_iterator)
{
/// Move to the end of the queue. The iterator remains valid.
queue.splice(queue.end(), queue, *cell.queue_iterator);
queue.moveToEnd(*cell.queue_iterator, cache_lock);
}
}
@ -237,7 +237,11 @@ FileSegments LRUFileCache::splitRangeIntoCells(
}
void LRUFileCache::fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
FileSegments & file_segments,
const Key & key,
const FileSegment::Range & range,
bool fill_with_detached_file_segments,
std::lock_guard<std::mutex> & cache_lock)
{
/// There are segments [segment1, ..., segmentN]
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
@ -319,7 +323,8 @@ void LRUFileCache::fillHolesWithEmptyFileSegments(
}
else
{
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
file_segments.splice(
file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
}
}
}
@ -397,10 +402,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock));
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
FileSegmentCell cell(std::move(file_segment), queue);
FileSegmentCell cell(std::move(file_segment), this, cache_lock);
auto & offsets = files[key];
@ -425,6 +430,10 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
{
std::lock_guard cache_lock(mutex);
#ifndef NDEBUG
assertCacheCorrectness(key, cache_lock);
#endif
auto * cell = getCell(key, offset, cache_lock);
if (cell)
throw Exception(
@ -437,15 +446,15 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
}
bool LRUFileCache::tryReserve(
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
auto removed_size = 0;
size_t queue_size = queue.size();
size_t queue_size = queue.getElementsNum(cache_lock);
assert(queue_size <= max_element_size);
/// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
/// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
auto * cell_for_reserve = getCell(key_, offset_, cache_lock);
auto * cell_for_reserve = getCell(key, offset, cache_lock);
/// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
/// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
@ -455,24 +464,27 @@ bool LRUFileCache::tryReserve(
auto is_overflow = [&]
{
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
return (max_size != 0 && current_size + size - removed_size > max_size)
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
|| (max_element_size != 0 && queue_size > max_element_size);
};
std::vector<FileSegmentCell *> to_evict;
std::vector<FileSegmentCell *> trash;
auto key_it = queue.begin();
while (is_overflow() && key_it != queue.end())
for (const auto & [entry_key, entry_offset, entry_size] : queue)
{
const auto [key, offset] = *key_it;
++key_it;
if (!is_overflow())
break;
auto * cell = getCell(key, offset, cache_lock);
auto * cell = getCell(entry_key, entry_offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}",
keyToStr(key), offset);
size_t cell_size = cell->size();
assert(entry_size == cell_size);
/// It is guaranteed that cell is not removed from cache as long as
/// pointer to corresponding file segment is hold by any other thread.
@ -495,7 +507,7 @@ bool LRUFileCache::tryReserve(
}
default:
{
remove(key, offset, cache_lock, segment_lock);
trash.push_back(cell);
break;
}
}
@ -505,11 +517,35 @@ bool LRUFileCache::tryReserve(
}
}
/// This case is very unlikely, can happen in case of exception from
/// file_segment->complete(), which would be a logical error.
assert(trash.empty());
for (auto & cell : trash)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
}
if (is_overflow())
return false;
if (cell_for_reserve && !cell_for_reserve->queue_iterator)
cell_for_reserve->queue_iterator = queue.insert(queue.end(), std::make_pair(key_, offset_));
/// cache cell is nullptr on server startup because we first check for space and then add a cell.
if (cell_for_reserve)
{
/// queue_iteratir is std::nullopt here if no space has been reserved yet, a cache cell
/// acquires queue iterator on first successful space reservation attempt.
/// If queue iterator already exists, we need to update the size after each space reservation.
auto queue_iterator = cell_for_reserve->queue_iterator;
if (queue_iterator)
queue.incrementSize(*queue_iterator, size, cache_lock);
else
cell_for_reserve->queue_iterator = queue.add(key, offset, size, cache_lock);
}
for (auto & cell : to_evict)
{
@ -521,8 +557,7 @@ bool LRUFileCache::tryReserve(
}
}
current_size += size - removed_size;
if (current_size > (1ull << 63))
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
return true;
@ -549,7 +584,10 @@ void LRUFileCache::remove(const Key & key)
for (auto & cell : to_remove)
{
if (!cell->releasable())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove file from cache because someone reads from it. File segment info: {}", cell->file_segment->getInfoForLog());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot remove file from cache because someone reads from it. File segment info: {}",
cell->file_segment->getInfoForLog());
auto file_segment = cell->file_segment;
if (file_segment)
@ -565,6 +603,10 @@ void LRUFileCache::remove(const Key & key)
if (fs::exists(key_path))
fs::remove(key_path);
#ifndef NDEBUG
assertCacheCorrectness(cache_lock);
#endif
}
void LRUFileCache::remove(bool force_remove_unreleasable)
@ -574,20 +616,22 @@ void LRUFileCache::remove(bool force_remove_unreleasable)
std::lock_guard cache_lock(mutex);
std::vector<FileSegment *> to_remove;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset] = *it++;
const auto & [key, offset, size] = *it++;
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache is in inconsistent state: LRU queue contains entries with no cache cell");
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell");
if (cell->releasable() || force_remove_unreleasable)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
std::lock_guard segment_lock(file_segment->mutex);
file_segment->detach(cache_lock, segment_lock);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
@ -606,7 +650,9 @@ void LRUFileCache::remove(
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset);
if (cell->queue_iterator)
queue.erase(*cell->queue_iterator);
{
queue.remove(*cell->queue_iterator, cache_lock);
}
auto & offsets = files[key];
offsets.erase(offset);
@ -642,7 +688,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
Key key;
UInt64 offset = 0;
size_t size = 0;
std::vector<std::pair<LRUQueueIterator, std::weak_ptr<FileSegment>>> queue_entries;
std::vector<std::pair<LRUQueue::Iterator, std::weak_ptr<FileSegment>>> queue_entries;
/// cache_base_path / key_prefix / key / offset
@ -681,7 +727,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
{
LOG_WARNING(log,
"Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
max_size, availableSize(), key_it->path().string(), size);
max_size, getAvailableCacheSizeUnlocked(cache_lock), key_it->path().string(), size);
fs::remove(offset_it->path());
}
}
@ -699,47 +745,11 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
if (file_segment.expired())
continue;
queue.splice(queue.end(), queue, it);
queue.moveToEnd(it, cache_lock);
}
}
LRUFileCache::Stat LRUFileCache::getStat()
{
std::lock_guard cache_lock(mutex);
Stat stat
{
.size = queue.size(),
.available = availableSize(),
.downloaded_size = 0,
.downloading_size = 0,
};
for (const auto & [key, offset] : queue)
{
const auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
switch (cell->file_segment->download_state)
{
case FileSegment::State::DOWNLOADED:
{
++stat.downloaded_size;
break;
}
case FileSegment::State::DOWNLOADING:
{
++stat.downloading_size;
break;
}
default:
break;
}
}
return stat;
#ifndef NDEBUG
assertCacheCorrectness(cache_lock);
#endif
}
void LRUFileCache::reduceSizeToDownloaded(
@ -754,14 +764,23 @@ void LRUFileCache::reduceSizeToDownloaded(
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cell found for key: {}, offset: {}", keyToStr(key), offset);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"No cell found for key: {}, offset: {}",
keyToStr(key), offset);
}
const auto & file_segment = cell->file_segment;
size_t downloaded_size = file_segment->downloaded_size;
if (downloaded_size == file_segment->range().size())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}", keyToStr(key), offset);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}",
keyToStr(key), offset);
}
cell->file_segment = std::make_shared<FileSegment>(offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED);
}
@ -814,16 +833,40 @@ std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
size_t LRUFileCache::getUsedCacheSize() const
{
std::lock_guard cache_lock(mutex);
return current_size;
return getUsedCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getCacheFilesNum() const
size_t LRUFileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getTotalWeight(cache_lock);
}
size_t LRUFileCache::getAvailableCacheSize() const
{
std::lock_guard cache_lock(mutex);
return files.size();
return getAvailableCacheSizeUnlocked(cache_lock);
}
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
size_t LRUFileCache::getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return max_size - getUsedCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getFileSegmentsNum() const
{
std::lock_guard cache_lock(mutex);
return getFileSegmentsNumUnlocked(cache_lock);
}
size_t LRUFileCache::getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getElementsNum(cache_lock);
}
LRUFileCache::FileSegmentCell::FileSegmentCell(
FileSegmentPtr file_segment_,
LRUFileCache * cache,
std::lock_guard<std::mutex> & cache_lock)
: file_segment(file_segment_)
{
/**
@ -836,7 +879,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
{
case FileSegment::State::DOWNLOADED:
{
queue_iterator = queue_.insert(queue_.end(), getKeyAndOffset());
queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock);
break;
}
case FileSegment::State::EMPTY:
@ -851,13 +894,97 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
}
}
LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & /* cache_lock */)
{
#ifndef NDEBUG
for (const auto & [entry_key, entry_offset, _] : queue)
{
if (entry_key == key && entry_offset == offset)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
keyToStr(key), offset, size);
}
#endif
cache_size += size;
return queue.insert(queue.end(), FileKeyAndOffset(key, offset, size));
}
void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size -= queue_it->size;
queue.erase(queue_it);
}
void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
queue.splice(queue.end(), queue, queue_it);
}
void LRUFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size += size_increment;
queue_it->size += size_increment;
}
bool LRUFileCache::LRUQueue::contains(
const IFileCache::Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */) const
{
/// This method is used for assertions in debug mode.
/// So we do not care about complexity here.
for (const auto & [entry_key, entry_offset, size] : queue)
{
if (key == entry_key && offset == entry_offset)
return true;
}
return false;
}
void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock)
{
[[maybe_unused]] size_t total_size = 0;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset, size] = *it++;
auto * cell = cache->getCell(key, offset, cache_lock);
if (!cell)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
}
assert(cell->size() == size);
total_size += size;
}
assert(total_size == cache_size);
assert(cache_size <= cache->max_size);
assert(queue.size() <= cache->max_element_size);
}
String LRUFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
{
String result;
for (const auto & [key, offset, size] : queue)
{
if (!result.empty())
result += ", ";
result += fmt::format("{}: [{}, {}]", keyToStr(key), offset, offset + size - 1);
}
return result;
}
String LRUFileCache::dumpStructure(const Key & key)
{
std::lock_guard cache_lock(mutex);
return dumpStructureImpl(key, cache_lock);
return dumpStructureUnlocked(key, cache_lock);
}
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
String LRUFileCache::dumpStructureUnlocked(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
WriteBufferFromOwnString result;
const auto & cells_by_offset = files[key];
@ -865,18 +992,37 @@ String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mut
for (const auto & [offset, cell] : cells_by_offset)
result << cell.file_segment->getInfoForLog() << "\n";
result << "\n\nQueue: " << queue.toString(cache_lock);
return result.str();
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
void LRUFileCache::assertCacheCellsCorrectness(
const FileSegmentsByOffset & cells_by_offset, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
{
const auto & cells_by_offset = files[key];
for (const auto & [_, cell] : cells_by_offset)
{
const auto & file_segment = cell.file_segment;
file_segment->assertCorrectness();
if (file_segment->reserved_size != 0)
{
assert(cell.queue_iterator);
assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
}
}
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
}
void LRUFileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
{
for (const auto & [key, cells_by_offset] : files)
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
}
}

View File

@ -92,7 +92,7 @@ public:
virtual size_t getUsedCacheSize() const = 0;
virtual size_t getCacheFilesNum() const = 0;
virtual size_t getFileSegmentsNum() const = 0;
protected:
String cache_base_path;
@ -155,19 +155,57 @@ public:
size_t getUsedCacheSize() const override;
size_t getCacheFilesNum() const override;
size_t getFileSegmentsNum() const override;
private:
using FileKeyAndOffset = std::pair<Key, size_t>;
using LRUQueue = std::list<FileKeyAndOffset>;
using LRUQueueIterator = typename LRUQueue::iterator;
class LRUQueue
{
public:
struct FileKeyAndOffset
{
Key key;
size_t offset;
size_t size;
FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {}
};
using Iterator = typename std::list<FileKeyAndOffset>::iterator;
size_t getTotalWeight(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
size_t getElementsNum(std::lock_guard<std::mutex> & /* cache_lock */) const { return queue.size(); }
Iterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
void remove(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
void moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
/// Space reservation for a file segment is incremental, so we need to be able to increment size of the queue entry.
void incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & cache_lock);
void assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
String toString(std::lock_guard<std::mutex> & cache_lock) const;
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) const;
Iterator begin() { return queue.begin(); }
Iterator end() { return queue.end(); }
private:
std::list<FileKeyAndOffset> queue;
size_t cache_size = 0;
};
struct FileSegmentCell : private boost::noncopyable
{
FileSegmentPtr file_segment;
/// Iterator is put here on first reservation attempt, if successful.
std::optional<LRUQueueIterator> queue_iterator;
std::optional<LRUQueue::Iterator> queue_iterator;
/// Pointer to file segment is always hold by the cache itself.
/// Apart from pointer in cache, it can be hold by cache users, when they call
@ -176,13 +214,11 @@ private:
size_t size() const { return file_segment->reserved_size; }
FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_);
FileSegmentCell(FileSegmentPtr file_segment_, LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
FileSegmentCell(FileSegmentCell && other) noexcept
: file_segment(std::move(other.file_segment))
, queue_iterator(other.queue_iterator) {}
std::pair<Key, size_t> getKeyAndOffset() const { return std::make_pair(file_segment->key(), file_segment->range().left); }
};
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
@ -190,7 +226,6 @@ private:
CachedFiles files;
LRUQueue queue;
size_t current_size = 0;
Poco::Logger * log;
FileSegments getImpl(
@ -225,31 +260,32 @@ private:
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) override;
size_t availableSize() const { return max_size - current_size; }
size_t getAvailableCacheSize() const;
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
FileSegments splitRangeIntoCells(
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
void fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
public:
struct Stat
{
size_t size;
size_t available;
size_t downloaded_size;
size_t downloading_size;
};
Stat getStat();
String dumpStructure(const Key & key_) override;
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
};
}

View File

@ -94,11 +94,6 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
}
String FileSegment::getCallerId()
{
return getCallerIdImpl();
}
String FileSegment::getCallerIdImpl()
{
if (!CurrentThread::isInitialized()
|| !CurrentThread::get().getQueryContext()
@ -400,7 +395,10 @@ bool FileSegment::reserve(size_t size)
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
if (reserved)
{
std::lock_guard segment_lock(mutex);
reserved_size += size;
}
return reserved;
}
@ -606,6 +604,7 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
info << "File segment: " << range().toString() << ", ";
info << "state: " << download_state << ", ";
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
info << "reserved size: " << reserved_size << ", ";
info << "downloader id: " << downloader_id << ", ";
info << "caller id: " << getCallerId();

View File

@ -184,8 +184,6 @@ private:
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock);
static String getCallerIdImpl();
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
const Range segment_range;

View File

@ -14,7 +14,7 @@
* make a persistent copy of the key in each of the following cases:
* 1) the aggregation method doesn't use temporary keys, so they're persistent
* from the start;
* 1) the key is already present in the hash table;
* 2) the key is already present in the hash table;
* 3) that particular key is stored by value, e.g. a short StringRef key in
* StringHashMap.
*

View File

@ -83,7 +83,7 @@ public:
current_word = 0;
}
void update(const char * data, UInt64 size)
ALWAYS_INLINE void update(const char * data, UInt64 size)
{
const char * end = data + size;
@ -137,12 +137,12 @@ public:
}
template <typename T>
void update(const T & x)
ALWAYS_INLINE void update(const T & x)
{
update(reinterpret_cast<const char *>(&x), sizeof(x)); /// NOLINT
}
void update(const std::string & x)
ALWAYS_INLINE void update(const std::string & x)
{
update(x.data(), x.length());
}

View File

@ -181,7 +181,7 @@ ThreadStatus::~ThreadStatus()
deleter();
/// Only change current_thread if it's currently being used by this ThreadStatus
/// For example, PushingToViewsBlockOutputStream creates and deletes ThreadStatus instances while running in the main query thread
/// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread
if (current_thread == this)
current_thread = nullptr;
}

View File

@ -135,6 +135,8 @@ TEST(LRUFileCache, get)
/// Current cache: [__________]
/// ^ ^
/// 0 9
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
ASSERT_EQ(cache.getUsedCacheSize(), 10);
{
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
@ -154,6 +156,8 @@ TEST(LRUFileCache, get)
/// Current cache: [__________][_____]
/// ^ ^^ ^
/// 0 910 14
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
ASSERT_EQ(cache.getUsedCacheSize(), 15);
{
auto holder = cache.getOrSet(key, 9, 1); /// Get [9, 9]
@ -179,12 +183,15 @@ TEST(LRUFileCache, get)
complete(cache.getOrSet(key, 17, 4)); /// Get [17, 20]
complete(cache.getOrSet(key, 24, 3)); /// Get [24, 26]
complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
// complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
/// Current cache: [__________][_____] [____] [___][]
/// ^ ^^ ^ ^ ^ ^ ^^^
/// 0 910 14 17 20 24 2627
///
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
ASSERT_EQ(cache.getUsedCacheSize(), 22);
{
auto holder = cache.getOrSet(key, 0, 26); /// Get [0, 25]
@ -249,7 +256,7 @@ TEST(LRUFileCache, get)
/// ^ ^ ^ ^ ^
/// 10 17 21 24 26
ASSERT_EQ(cache.getStat().size, 5);
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
{
auto holder = cache.getOrSet(key, 23, 5); /// Get [23, 28]
@ -479,8 +486,6 @@ TEST(LRUFileCache, get)
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
cache2.initialize();
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
auto holder1 = cache2.getOrSet(key, 2, 28); /// Get [2, 29]
auto segments1 = fromHolder(holder1);
ASSERT_EQ(segments1.size(), 5);

View File

@ -46,7 +46,7 @@ struct BlockInfo
void read(ReadBuffer & in);
};
/// Block extension to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults.
/// Block extension to support delayed defaults. AddingDefaultsTransform uses it to replace missing values with column defaults.
class BlockMissingValues
{
public:

View File

@ -22,6 +22,10 @@ namespace DB
{
class IColumn;
static constexpr UInt64 operator""_Gb(unsigned long long value)
{
return value * 1024 * 1024 * 1024;
}
/** List of settings: type, name, default value, description, flags
*
@ -340,7 +344,7 @@ class IColumn;
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
@ -356,9 +360,9 @@ class IColumn;
M(OverflowMode, distinct_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
\
M(UInt64, max_memory_usage, 0, "Maximum memory usage for processing of single query. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage, 0, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage, 10_Gb, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage_for_user, 0, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage_for_user, 10_Gb, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \

View File

@ -34,7 +34,8 @@ IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
{{"auto", JoinAlgorithm::AUTO},
{"hash", JoinAlgorithm::HASH},
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}})
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH}})
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,

View File

@ -42,6 +42,7 @@ enum class JoinAlgorithm
HASH,
PARTIAL_MERGE,
PREFER_PARTIAL_MERGE,
PARALLEL_HASH,
};
DECLARE_SETTING_ENUM(JoinAlgorithm)

View File

@ -26,7 +26,7 @@ namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int LOGICAL_ERROR;
extern const int DUPLICATE_COLUMN;
extern const int INCOMPATIBLE_COLUMNS;
}
size_t getNumberOfDimensions(const IDataType & type)
@ -183,6 +183,20 @@ static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts &
return true;
}
/// Returns true if there exists a prefix with matched names,
/// but not matched structure (is Nested, number of dimensions).
static bool hasDifferentStructureInPrefix(const PathInData::Parts & lhs, const PathInData::Parts & rhs)
{
for (size_t i = 0; i < std::min(lhs.size(), rhs.size()); ++i)
{
if (lhs[i].key != rhs[i].key)
return false;
else if (lhs[i] != rhs[i])
return true;
}
return false;
}
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
{
size_t size = paths.size();
@ -192,9 +206,15 @@ void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
{
if (isPrefix(paths[i].getParts(), paths[j].getParts())
|| isPrefix(paths[j].getParts(), paths[i].getParts()))
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
"Data in Object has ambiguous paths: '{}' and '{}'",
paths[i].getPath(), paths[j].getPath());
if (hasDifferentStructureInPrefix(paths[i].getParts(), paths[j].getParts()))
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
"Data in Object has ambiguous paths: '{}' and '{}'. "
"Paths have prefixes matched by names, but different in structure",
paths[i].getPath(), paths[j].getPath());
}
}
}

View File

@ -213,11 +213,14 @@ bool JSONDataParser<ParserImpl>::tryInsertDefaultFromNested(
{
/// If there is a collected size of current Nested
/// then insert array of this size as a default value.
if (path.empty())
if (path.empty() || array.empty())
return false;
/// Last element is not Null, because otherwise this path wouldn't exist.
auto nested_key = getNameOfNested(path, array.back());
if (nested_key.empty())
return false;
StringRef nested_key{path[0].key};
auto * mapped = ctx.nested_sizes_by_key.find(nested_key);
if (!mapped)
return false;
@ -253,7 +256,18 @@ StringRef JSONDataParser<ParserImpl>::getNameOfNested(const PathInData::Parts &
if (value.getType() != Field::Types::Array || path.empty())
return {};
return StringRef{path[0].key};
/// Find first key that is marked as nested,
/// because we may have tuple of Nested and there could be
/// several arrays with the same prefix, but with independent sizes.
/// Consider we have array element with type `k2 Tuple(k3 Nested(...), k5 Nested(...))`
/// Then subcolumns `k2.k3` and `k2.k5` may have indepented sizes and we should extract
/// `k3` and `k5` keys instead of `k2`.
for (const auto & part : path)
if (part.is_nested)
return StringRef{part.key};
return {};
}
#if USE_SIMDJSON

View File

@ -60,7 +60,7 @@ public:
* 1. \N
* 2. empty string (without quotes)
* 3. NULL
* We support all of them (however, second variant is supported by CSVRowInputStream, not by deserializeTextCSV).
* We support all of them (however, second variant is supported by CSVRowInputFormat, not by deserializeTextCSV).
* (see also input_format_defaults_for_omitted_fields and input_format_csv_unquoted_null_literal_as_null settings)
* In CSV, non-NULL string value, starting with \N characters, must be placed in quotes, to avoid ambiguity.
*/

View File

@ -104,15 +104,17 @@ bool DatabaseReplicatedDDLWorker::waitForReplicaToProcessAllEntries(UInt64 timeo
auto max_log = DDLTask::getLogEntryName(max_log_ptr);
LOG_TRACE(log, "Waiting for worker thread to process all entries before {}, current task is {}", max_log, current_task);
std::unique_lock lock{mutex};
bool processed = wait_current_task_change.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&]()
{
assert(zookeeper->expired() || current_task <= max_log);
return zookeeper->expired() || current_task == max_log || stop_flag;
});
std::unique_lock lock{mutex};
bool processed = wait_current_task_change.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&]()
{
assert(zookeeper->expired() || current_task <= max_log);
return zookeeper->expired() || current_task == max_log || stop_flag;
});
if (!processed)
return false;
if (!processed)
return false;
}
LOG_TRACE(log, "Waiting for worker thread to process all entries before {}, current task is {}", max_log, current_task);

View File

@ -217,7 +217,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
}
else
{
/// We pass empty block to RemoteBlockInputStream, because we don't know the structure of the result.
/// We pass empty block to RemoteQueryExecutor, because we don't know the structure of the result.
Block invalidate_sample_block;
QueryPipeline pipeline(std::make_shared<RemoteSource>(
std::make_shared<RemoteQueryExecutor>(pool, request, invalidate_sample_block, context_copy), false, false));

View File

@ -67,7 +67,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
block_key_columns.reserve(dictionary_keys_size);
QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys));
QueryPipeline pipeline(getSourcePipe(key_columns, requested_keys));
PullingPipelineExecutor executor(pipeline);
@ -185,7 +185,7 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
block_key_columns.reserve(dictionary_keys_size);
QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys));
QueryPipeline pipeline(getSourcePipe(key_columns, requested_keys));
PullingPipelineExecutor executor(pipeline);
size_t keys_found = 0;
@ -259,7 +259,7 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::isInHierarchy(
}
template <DictionaryKeyType dictionary_key_type>
Pipe DirectDictionary<dictionary_key_type>::getSourceBlockInputStream(
Pipe DirectDictionary<dictionary_key_type>::getSourcePipe(
const Columns & key_columns [[maybe_unused]],
const PaddedPODArray<KeyType> & requested_keys [[maybe_unused]]) const
{

View File

@ -96,7 +96,7 @@ public:
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
Pipe getSourceBlockInputStream(const Columns & key_columns, const PaddedPODArray<KeyType> & requested_keys) const;
Pipe getSourcePipe(const Columns & key_columns, const PaddedPODArray<KeyType> & requested_keys) const;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -697,7 +697,6 @@ void IPAddressDictionary::getItemsImpl(
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
// special case for getBlockInputStream
if (unlikely(key_columns.size() == 2))
{
getItemsByTwoKeyColumnsImpl<AttributeType>(

View File

@ -3,6 +3,7 @@
#include <Common/logger_useful.h>
#include <Common/escapeForFileName.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/SeekAvoidingReadBuffer.h>
#include <IO/ReadHelpers.h>

View File

@ -62,7 +62,7 @@ public:
/** Fast reading data from buffer and save result to memory.
* Reads at least min_chunk_bytes and some more until the end of the chunk, depends on the format.
* Used in ParallelParsingBlockInputStream.
* Used in ParallelParsingInputFormat.
*/
using FileSegmentationEngine = std::function<std::pair<bool, size_t>(
ReadBuffer & buf,

View File

@ -44,7 +44,7 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_,
{
istr_concrete = typeid_cast<CompressedReadBufferFromFile *>(&istr);
if (!istr_concrete)
throw Exception("When need to use index for NativeBlockInputStream, istr must be CompressedReadBufferFromFile.", ErrorCodes::LOGICAL_ERROR);
throw Exception("When need to use index for NativeReader, istr must be CompressedReadBufferFromFile.", ErrorCodes::LOGICAL_ERROR);
if (index_block_it == index_block_end)
return;
@ -80,7 +80,7 @@ void NativeReader::readData(const ISerialization & serialization, ColumnPtr & co
if (column->size() != rows)
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
"Cannot read all data in NativeBlockInputStream. Rows read: {}. Rows expected: {}", column->size(), rows);
"Cannot read all data in NativeReader. Rows read: {}. Rows expected: {}", column->size(), rows);
}

View File

@ -35,7 +35,7 @@ NativeWriter::NativeWriter(
{
ostr_concrete = typeid_cast<CompressedWriteBuffer *>(&ostr);
if (!ostr_concrete)
throw Exception("When need to write index for NativeBlockOutputStream, ostr must be CompressedWriteBuffer.", ErrorCodes::LOGICAL_ERROR);
throw Exception("When need to write index for NativeWriter, ostr must be CompressedWriteBuffer.", ErrorCodes::LOGICAL_ERROR);
}
}

View File

@ -417,17 +417,17 @@ struct TimeWindowImpl<HOP>
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
wstart = AddTime<kind>::execute(wend, -1 * window_num_units, time_zone);
wstart = AddTime<kind>::execute(wend, -window_num_units, time_zone);
ToType wend_latest;
do
{
wend_latest = wend;
wend = AddTime<kind>::execute(wend, -1 * hop_num_units, time_zone);
wend = AddTime<kind>::execute(wend, -hop_num_units, time_zone);
} while (wend > time_data[i]);
end_data[i] = wend_latest;
start_data[i] = AddTime<kind>::execute(wend_latest, -1 * window_num_units, time_zone);
start_data[i] = AddTime<kind>::execute(wend_latest, -window_num_units, time_zone);
}
MutableColumns result;
result.emplace_back(std::move(start));
@ -570,7 +570,7 @@ struct TimeWindowImpl<WINDOW_ID>
do
{
wend_latest = wend;
wend = AddTime<kind>::execute(wend, -1 * gcd_num_units, time_zone);
wend = AddTime<kind>::execute(wend, -gcd_num_units, time_zone);
} while (wend > time_data[i]);
end_data[i] = wend_latest;

View File

@ -308,7 +308,7 @@ void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink,
{
ssize_t size = length;
if (size < 0)
size += static_cast<ssize_t>(src.getElementSize()) - offset;
size += offset;
if (size > 0)
writeSlice(src.getSliceFromRight(offset, size), sink);

View File

@ -55,7 +55,7 @@ public:
~BitReader() = default;
// reads bits_to_read high-bits from bits_buffer
inline UInt64 readBits(UInt8 bits_to_read)
ALWAYS_INLINE inline UInt64 readBits(UInt8 bits_to_read)
{
if (bits_to_read > bits_count)
fillBitBuffer();
@ -71,7 +71,7 @@ public:
return getBitsFromBitBuffer<PEEK>(8);
}
inline UInt8 readBit()
ALWAYS_INLINE inline UInt8 readBit()
{
return static_cast<UInt8>(readBits(1));
}
@ -122,7 +122,7 @@ private:
// Fills internal bits_buffer with data from source, reads at most 64 bits
size_t fillBitBuffer()
ALWAYS_INLINE size_t fillBitBuffer()
{
const size_t available = source_end - source_current;
const auto bytes_to_read = std::min<size_t>(64 / 8, available);

View File

@ -31,7 +31,6 @@ namespace DB
// because custom S3 implementation may allow relaxed requirements on that.
const int S3_WARN_MAX_PARTS = 10000;
namespace ErrorCodes
{
extern const int S3_ERROR;

View File

@ -1194,9 +1194,18 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl(
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
{
if (inst->offsets)
inst->batch_that->addBatchSinglePlaceFromInterval(inst->offsets[row_begin], inst->offsets[row_end - 1], res + inst->state_offset, inst->batch_arguments, arena);
inst->batch_that->addBatchSinglePlaceFromInterval(
inst->offsets[static_cast<ssize_t>(row_begin) - 1],
inst->offsets[row_end - 1],
res + inst->state_offset,
inst->batch_arguments, arena);
else
inst->batch_that->addBatchSinglePlaceFromInterval(row_begin, row_end, res + inst->state_offset, inst->batch_arguments, arena);
inst->batch_that->addBatchSinglePlaceFromInterval(
row_begin,
row_end,
res + inst->state_offset,
inst->batch_arguments,
arena);
}
}

View File

@ -616,7 +616,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
for (const auto & [_, cache_data] : caches)
{
new_values["FilesystemCacheBytes"] = cache_data.cache->getUsedCacheSize();
new_values["FilesystemCacheFiles"] = cache_data.cache->getCacheFilesNum();
new_values["FilesystemCacheFiles"] = cache_data.cache->getFileSegmentsNum();
}
}

View File

@ -37,8 +37,8 @@ class IStreamFactory;
ContextMutablePtr updateSettingsForCluster(
const Cluster & cluster, ContextPtr context, const Settings & settings, Poco::Logger * log = nullptr);
/// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read.
/// `stream_factory` object encapsulates the logic of creating streams for a different type of query
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
/// `stream_factory` object encapsulates the logic of creating plans for a different type of query
/// (currently SELECT, DESCRIBE).
void executeQuery(
QueryPlan & query_plan,

View File

@ -0,0 +1,206 @@
#include <memory>
#include <mutex>
#include <Columns/FilterDescription.h>
#include <Columns/IColumn.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/ConcurrentHashJoin.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/SubqueryForSet.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/createBlockSelector.h>
#include <Parsers/DumpASTNode.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/parseQuery.h>
#include <Common/Exception.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int BAD_ARGUMENTS;
}
namespace JoinStuff
{
ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_)
: context(context_)
, table_join(table_join_)
, slots(slots_)
{
if (!slots_ || slots_ >= 256)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_);
}
for (size_t i = 0; i < slots; ++i)
{
auto inner_hash_join = std::make_shared<InternalHashJoin>();
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_);
hash_joins.emplace_back(std::move(inner_hash_join));
}
}
bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits)
{
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, block);
std::list<size_t> pending_blocks;
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
pending_blocks.emplace_back(i);
while (!pending_blocks.empty())
{
for (auto iter = pending_blocks.begin(); iter != pending_blocks.end();)
{
auto & i = *iter;
auto & hash_join = hash_joins[i];
auto & dispatched_block = dispatched_blocks[i];
if (hash_join->mutex.try_lock())
{
if (!hash_join->data->addJoinedBlock(dispatched_block, check_limits))
{
hash_join->mutex.unlock();
return false;
}
hash_join->mutex.unlock();
iter = pending_blocks.erase(iter);
}
else
iter++;
}
}
if (check_limits)
return table_join->sizeLimits().check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
return true;
}
void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr<ExtraBlock> & /*not_processed*/)
{
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_left, block);
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
{
std::shared_ptr<ExtraBlock> none_extra_block;
auto & hash_join = hash_joins[i];
auto & dispatched_block = dispatched_blocks[i];
hash_join->data->joinBlock(dispatched_block, none_extra_block);
if (none_extra_block && !none_extra_block->empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "not_processed should be empty");
}
block = concatenateBlocks(dispatched_blocks);
}
void ConcurrentHashJoin::checkTypesOfKeys(const Block & block) const
{
hash_joins[0]->data->checkTypesOfKeys(block);
}
void ConcurrentHashJoin::setTotals(const Block & block)
{
if (block)
{
std::lock_guard lock(totals_mutex);
totals = block;
}
}
const Block & ConcurrentHashJoin::getTotals() const
{
return totals;
}
size_t ConcurrentHashJoin::getTotalRowCount() const
{
size_t res = 0;
for (const auto & hash_join : hash_joins)
{
std::lock_guard lock(hash_join->mutex);
res += hash_join->data->getTotalRowCount();
}
return res;
}
size_t ConcurrentHashJoin::getTotalByteCount() const
{
size_t res = 0;
for (const auto & hash_join : hash_joins)
{
std::lock_guard lock(hash_join->mutex);
res += hash_join->data->getTotalByteCount();
}
return res;
}
bool ConcurrentHashJoin::alwaysReturnsEmptySet() const
{
for (const auto & hash_join : hash_joins)
{
std::lock_guard lock(hash_join->mutex);
if (!hash_join->data->alwaysReturnsEmptySet())
return false;
}
return true;
}
std::shared_ptr<NotJoinedBlocks> ConcurrentHashJoin::getNonJoinedBlocks(
const Block & /*left_sample_block*/, const Block & /*result_sample_block*/, UInt64 /*max_block_size*/) const
{
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi ||
!isRightOrFull(table_join->kind()))
{
return {};
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness());
}
Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, const Block & from_block)
{
Blocks result;
size_t num_shards = hash_joins.size();
size_t num_rows = from_block.rows();
size_t num_cols = from_block.columns();
ColumnRawPtrs key_cols;
for (const auto & key_name : key_columns_names)
{
key_cols.push_back(from_block.getByName(key_name).column.get());
}
IColumn::Selector selector(num_rows);
for (size_t i = 0; i < num_rows; ++i)
{
SipHash hash;
for (const auto & key_col : key_cols)
{
key_col->updateHashWithValue(i, hash);
}
selector[i] = hash.get64() % num_shards;
}
for (size_t i = 0; i < num_shards; ++i)
{
result.emplace_back(from_block.cloneEmpty());
}
for (size_t i = 0; i < num_cols; ++i)
{
auto dispatched_columns = from_block.getByPosition(i).column->scatter(num_shards, selector);
assert(result.size() == dispatched_columns.size());
for (size_t block_index = 0; block_index < num_shards; ++block_index)
{
result[block_index].getByPosition(i).column = std::move(dispatched_columns[block_index]);
}
}
return result;
}
}
}

View File

@ -0,0 +1,75 @@
#pragma once
#include <condition_variable>
#include <memory>
#include <optional>
#include <Core/BackgroundSchedulePool.h>
#include <Functions/FunctionsLogical.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/HashJoin.h>
#include <Interpreters/IJoin.h>
#include <base/defines.h>
#include <base/types.h>
#include <Common/Stopwatch.h>
namespace DB
{
namespace JoinStuff
{
/**
* Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by
* the degree of parallelism.
*
* The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When
* the right table is large, the join process is too slow.
*
* We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks
* corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin
* instance is written by only one thread.
*
* When come to the left table matching, the blocks from left table are alse split into different HashJoin instances.
*
*/
class ConcurrentHashJoin : public IJoin
{
public:
explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false);
~ConcurrentHashJoin() override = default;
const TableJoin & getTableJoin() const override { return *table_join; }
bool addJoinedBlock(const Block & block, bool check_limits) override;
void checkTypesOfKeys(const Block & block) const override;
void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) override;
void setTotals(const Block & block) override;
const Block & getTotals() const override;
size_t getTotalRowCount() const override;
size_t getTotalByteCount() const override;
bool alwaysReturnsEmptySet() const override;
bool supportParallelJoin() const override { return true; }
std::shared_ptr<NotJoinedBlocks>
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
private:
struct InternalHashJoin
{
std::mutex mutex;
std::unique_ptr<HashJoin> data;
};
ContextPtr context;
std::shared_ptr<TableJoin> table_join;
size_t slots;
std::vector<std::shared_ptr<InternalHashJoin>> hash_joins;
std::mutex finished_add_joined_blocks_tasks_mutex;
std::condition_variable finished_add_joined_blocks_tasks_cond;
std::atomic<UInt32> finished_add_joined_blocks_tasks = 0;
mutable std::mutex totals_mutex;
Block totals;
Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block);
};
}
}

View File

@ -383,7 +383,7 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
txn->addOp(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
}
txn->addOp(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
txn->addOp(getOpToUpdateLogPointer());
for (auto & op : ops)
txn->addOp(std::move(op));
@ -392,6 +392,11 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
return query_context;
}
Coordination::RequestPtr DatabaseReplicatedTask::getOpToUpdateLogPointer()
{
return zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1);
}
String DDLTaskBase::getLogEntryName(UInt32 log_entry_number)
{
return zkutil::getSequentialNodeName("query-", log_entry_number);

View File

@ -107,6 +107,7 @@ struct DDLTaskBase
virtual String getShardID() const = 0;
virtual ContextMutablePtr makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper);
virtual Coordination::RequestPtr getOpToUpdateLogPointer() { return nullptr; }
inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; }
inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; }
@ -145,6 +146,7 @@ struct DatabaseReplicatedTask : public DDLTaskBase
String getShardID() const override;
void parseQueryFromEntry(ContextPtr context) override;
ContextMutablePtr makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) override;
Coordination::RequestPtr getOpToUpdateLogPointer() override;
DatabaseReplicated * database;
};

View File

@ -180,7 +180,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
{
/// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
/// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
/// Otherwise, that node will be ignored by DDLQueryStatusSource.
out_reason = "Incorrect task format";
write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
return {};
@ -715,6 +715,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
if (zookeeper->exists(is_executed_path, nullptr, event))
{
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
if (auto op = task.getOpToUpdateLogPointer())
task.ops.push_back(op);
return true;
}
@ -759,6 +761,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
{
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by);
executed_by_other_leader = true;
if (auto op = task.getOpToUpdateLogPointer())
task.ops.push_back(op);
break;
}
@ -786,6 +790,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
{
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
executed_by_other_leader = true;
if (auto op = task.getOpToUpdateLogPointer())
task.ops.push_back(op);
break;
}
else

View File

@ -91,7 +91,7 @@ protected:
/// Executes query only on leader replica in case of replicated table.
/// Queries like TRUNCATE/ALTER .../OPTIMIZE have to be executed only on one node of shard.
/// Most of these queries can be executed on non-leader replica, but actually they still send
/// query via RemoteBlockOutputStream to leader, so to avoid such "2-phase" query execution we
/// query via RemoteQueryExecutor to leader, so to avoid such "2-phase" query execution we
/// execute query directly on leader.
bool tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,

View File

@ -1,3 +1,4 @@
#include <memory>
#include <Core/Block.h>
#include <Parsers/ASTExpressionList.h>
@ -16,6 +17,7 @@
#include <Interpreters/ArrayJoinAction.h>
#include <Interpreters/Context.h>
#include <Interpreters/ConcurrentHashJoin.h>
#include <Interpreters/DictionaryReader.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/ExpressionActions.h>
@ -934,7 +936,13 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> ana
bool allow_merge_join = analyzed_join->allowMergeJoin();
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
{
if (analyzed_join->allowParallelHashJoin())
{
return std::make_shared<JoinStuff::ConcurrentHashJoin>(context, analyzed_join, context->getSettings().max_threads, sample_block);
}
return std::make_shared<HashJoin>(analyzed_join, sample_block);
}
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
return std::make_shared<MergeJoin>(analyzed_join, sample_block);
return std::make_shared<JoinSwitcher>(analyzed_join, sample_block);

View File

@ -13,7 +13,7 @@ bool equals(const Field & lhs, const Field & rhs);
/** Helps to implement modifier WITH FILL for ORDER BY clause.
* Stores row as array of fields and provides functions to generate next row for filling gaps and for comparing rows.
* Used in FillingBlockInputStream and in FillingTransform.
* Used in FillingTransform.
*/
class FillingRow
{

View File

@ -45,6 +45,9 @@ public:
/// Different query plan is used for such joins.
virtual bool isFilled() const { return false; }
// That can run FillingRightJoinSideTransform parallelly
virtual bool supportParallelJoin() const { return false; }
virtual std::shared_ptr<NotJoinedBlocks>
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0;
};

View File

@ -2,6 +2,7 @@
#include <Access/Common/AccessFlags.h>
#include <Access/EnabledQuota.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Columns/ColumnNullable.h>
#include <Processors/Transforms/buildPushingToViewsChain.h>
#include <DataTypes/DataTypeNullable.h>
@ -153,7 +154,18 @@ Block InterpreterInsertQuery::getSampleBlock(
return res;
}
static bool hasAggregateFunctions(const IAST * ast)
{
if (const auto * func = typeid_cast<const ASTFunction *>(ast))
if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name))
return true;
for (const auto & child : ast->children)
if (hasAggregateFunctions(child.get()))
return true;
return false;
}
/** A query that just reads all data without any complex computations or filetering.
* If we just pipe the result to INSERT, we don't have to use too many threads for read.
*/
@ -186,7 +198,8 @@ static bool isTrivialSelect(const ASTPtr & select)
&& !select_query->groupBy()
&& !select_query->having()
&& !select_query->orderBy()
&& !select_query->limitBy());
&& !select_query->limitBy()
&& !hasAggregateFunctions(select_query));
}
/// This query is ASTSelectWithUnionQuery subquery
return false;
@ -396,7 +409,7 @@ BlockIO InterpreterInsertQuery::execute()
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
{
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
/// default column values (in AddingDefaultBlockOutputStream), so all values will be cast correctly.
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
if (input_columns[col_idx].type->isNullable() && !query_columns[col_idx].type->isNullable() && output_columns.hasDefault(query_columns[col_idx].name))
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name));
}

View File

@ -1280,7 +1280,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
query_plan.getCurrentDataStream(),
joined_plan->getCurrentDataStream(),
expressions.join,
settings.max_block_size);
settings.max_block_size,
max_streams,
analysis_result.optimize_read_in_order);
join_step->setStepDescription("JOIN");
std::vector<QueryPlanPtr> plans;

View File

@ -748,4 +748,15 @@ void TableJoin::resetToCross()
this->table_join.kind = ASTTableJoin::Kind::Cross;
}
bool TableJoin::allowParallelHashJoin() const
{
if (dictionary_reader || join_algorithm != JoinAlgorithm::PARALLEL_HASH)
return false;
if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner)
return false;
if (isSpecialStorage() || !oneDisjunct())
return false;
return true;
}
}

View File

@ -191,10 +191,11 @@ public:
bool allowMergeJoin() const;
bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; }
bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; }
bool allowParallelHashJoin() const;
bool forceHashJoin() const
{
/// HashJoin always used for DictJoin
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH;
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH || join_algorithm == JoinAlgorithm::PARALLEL_HASH;
}
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }

View File

@ -144,7 +144,7 @@ bool VersionMetadata::isRemovalTIDLocked() const
void VersionMetadata::setCreationTID(const TransactionID & tid, TransactionInfoContext * context)
{
/// NOTE ReplicatedMergeTreeBlockOutputStream may add one part multiple times
/// NOTE ReplicatedMergeTreeSink may add one part multiple times
assert(creation_tid.isEmpty() || creation_tid == tid);
creation_tid = tid;
if (context)

View File

@ -275,7 +275,7 @@ DDLQueryStatusSource::DDLQueryStatusSource(
, node_path(zk_node_path)
, context(context_)
, watch(CLOCK_MONOTONIC_COARSE)
, log(&Poco::Logger::get("DDLQueryStatusInputStream"))
, log(&Poco::Logger::get("DDLQueryStatusSource"))
{
auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;

View File

@ -821,7 +821,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
element.profile_counters = info.profile_counters;
/// We need to refresh the access info since dependent views might have added extra information, either during
/// creation of the view (PushingToViewsBlockOutputStream) or while executing its internal SELECT
/// creation of the view (PushingToViews chain) or while executing its internal SELECT
const auto & access_info = context_ptr->getQueryAccessInfo();
element.query_databases.insert(access_info.databases.begin(), access_info.databases.end());
element.query_tables.insert(access_info.tables.begin(), access_info.tables.end());

View File

@ -181,6 +181,10 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
else if (!disk.empty())
print_identifier(disk);
}
else if (type == Type::SYNC_DATABASE_REPLICA)
{
print_identifier(database->as<ASTIdentifier>()->name());
}
else if (type == Type::DROP_REPLICA)
{
print_drop_replica();

View File

@ -247,6 +247,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
case Type::SYNC_DATABASE_REPLICA:
{
parseQueryWithOnCluster(res, pos, expected);
if (!parseDatabaseAsAST(pos, expected, res->database))
return false;
break;

View File

@ -637,7 +637,7 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c
for (auto & desc : def.columns_to_aggregate)
{
if (!desc.created)
throw Exception("Logical error in SummingSortedBlockInputStream, there are no description",
throw Exception("Logical error in SummingSortedAlgorithm, there are no description",
ErrorCodes::LOGICAL_ERROR);
if (desc.is_agg_func_type)

View File

@ -131,7 +131,7 @@ IProcessor::Status IMergingTransformBase::prepare()
return Status::Finished;
}
/// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before.
/// Do not disable inputs, so they can be executed in parallel.
bool is_port_full = !output.canPush();
/// Push if has data.

View File

@ -170,8 +170,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
}
else
{
pipeline.resize(1);
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AggregatingTransform>(header, transform_params);

View File

@ -15,9 +15,10 @@ JoinStep::JoinStep(
const DataStream & left_stream_,
const DataStream & right_stream_,
JoinPtr join_,
size_t max_block_size_)
: join(std::move(join_))
, max_block_size(max_block_size_)
size_t max_block_size_,
size_t max_streams_,
bool keep_left_read_in_order_)
: join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
{
input_streams = {left_stream_, right_stream_};
output_stream = DataStream
@ -31,7 +32,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
if (pipelines.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps");
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors);
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, max_streams, keep_left_read_in_order, &processors);
}
void JoinStep::describePipeline(FormatSettings & settings) const

View File

@ -16,7 +16,9 @@ public:
const DataStream & left_stream_,
const DataStream & right_stream_,
JoinPtr join_,
size_t max_block_size_);
size_t max_block_size_,
size_t max_streams_,
bool keep_left_read_in_order_);
String getName() const override { return "Join"; }
@ -29,6 +31,8 @@ public:
private:
JoinPtr join;
size_t max_block_size;
size_t max_streams;
bool keep_left_read_in_order;
Processors processors;
};

View File

@ -7,7 +7,7 @@ namespace DB
{
/** A stream of blocks from which you can read the next block from an explicitly provided list.
* Also see OneBlockInputStream.
* Also see SourceFromSingleChunk.
*/
class BlocksListSource : public SourceWithProgress
{

View File

@ -56,7 +56,7 @@ MySQLSource::MySQLSource(
const Block & sample_block,
const StreamSettings & settings_)
: SourceWithProgress(sample_block.cloneEmpty())
, log(&Poco::Logger::get("MySQLBlockInputStream"))
, log(&Poco::Logger::get("MySQLSource"))
, connection{std::make_unique<Connection>(entry, query_str)}
, settings{std::make_unique<StreamSettings>(settings_)}
{
@ -64,10 +64,10 @@ MySQLSource::MySQLSource(
initPositionMappingFromQueryResultStructure();
}
/// For descendant MySQLWithFailoverBlockInputStream
/// For descendant MySQLWithFailoverSource
MySQLSource::MySQLSource(const Block &sample_block_, const StreamSettings & settings_)
: SourceWithProgress(sample_block_.cloneEmpty())
, log(&Poco::Logger::get("MySQLBlockInputStream"))
, log(&Poco::Logger::get("MySQLSource"))
, settings(std::make_unique<StreamSettings>(settings_))
{
description.init(sample_block_);

Some files were not shown because too many files have changed in this diff Show More