Merge remote-tracking branch 'origin/master' into mysql_replica_gtid_issue_4006

This commit is contained in:
BohuTANG 2020-08-19 08:04:02 +08:00
commit 26525a5eb1
105 changed files with 2197 additions and 664 deletions

View File

@ -6,6 +6,8 @@
#include <string.h>
#include <unistd.h>
#include <iostream>
namespace
{
@ -107,6 +109,8 @@ ReadlineLineReader::ReadlineLineReader(
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + strerror(errno));
rl_variable_bind("completion-ignore-case", "on");
// TODO: it doesn't work
// history_write_timestamps = 1;
}
ReadlineLineReader::~ReadlineLineReader()
@ -129,6 +133,11 @@ LineReader::InputStatus ReadlineLineReader::readOneLine(const String & prompt)
void ReadlineLineReader::addToHistory(const String & line)
{
add_history(line.c_str());
// Flush changes to the disk
// NOTE readline builds a buffer of all the lines to write, and write them in one syscall.
// Thus there is no need to lock the history file here.
write_history(history_file_path.c_str());
}
#if RL_VERSION_MAJOR >= 7

View File

@ -25,8 +25,8 @@ PEERDIR(
contrib/libs/cctz/src
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/poco/Util
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/fmt
contrib/restricted/boost
contrib/restricted/cityhash-1.0.2
@ -52,6 +52,7 @@ SRCS(
shift10.cpp
sleep.cpp
terminalColors.cpp
)
END()

View File

@ -10,6 +10,7 @@ CFLAGS (GLOBAL -DARCADIA_BUILD)
CFLAGS (GLOBAL -DUSE_CPUID=1)
CFLAGS (GLOBAL -DUSE_JEMALLOC=0)
CFLAGS (GLOBAL -DUSE_RAPIDJSON=1)
CFLAGS (GLOBAL -DUSE_SSL=1)
IF (OS_DARWIN)
CFLAGS (GLOBAL -DOS_DARWIN)
@ -24,6 +25,7 @@ PEERDIR(
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/Util
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/fmt
contrib/restricted/boost
contrib/restricted/cityhash-1.0.2

View File

@ -11,41 +11,42 @@ RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 mai
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
bash \
fakeroot \
ccache \
software-properties-common \
apt-transport-https \
ca-certificates \
wget \
bash \
fakeroot \
cmake \
ccache \
llvm-10 \
clang-10 \
lld-10 \
clang-tidy-10 \
ninja-build \
gperf \
git \
tzdata \
gperf \
rename \
build-essential \
expect \
python \
python-lxml \
python-termcolor \
python-requests \
unixodbc \
qemu-user-static \
sudo \
moreutils \
curl \
brotli
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
apt-transport-https \
bash \
bash \
brotli \
build-essential \
ca-certificates \
ccache \
ccache \
clang-10 \
clang-tidy-10 \
cmake \
curl \
expect \
fakeroot \
fakeroot \
git \
gperf \
gperf \
lld-10 \
llvm-10 \
moreutils \
ninja-build \
psmisc \
python \
python-lxml \
python-requests \
python-termcolor \
qemu-user-static \
rename \
software-properties-common \
sudo \
tzdata \
unixodbc \
wget
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget --quiet -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

View File

@ -1,12 +1,65 @@
#!/bin/bash
set -xeu
set -o pipefail
trap "exit" INT TERM
trap 'kill $(jobs -pr) ||:' EXIT
set -x -e
# This script is separated into two stages, cloning and everything else, so
# that we can run the "everything else" stage from the cloned source (we don't
# do this yet).
stage=${stage:-}
# A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about
# it being undefined. Also read it as array so that we can pass an empty list
# of additional variable to cmake properly, and it doesn't generate an extra
# empty parameter.
read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
ls -la
function kill_clickhouse
{
for _ in {1..60}
do
if ! pkill -f clickhouse-server ; then break ; fi
sleep 1
done
if pgrep -f clickhouse-server
then
pstree -apgT
jobs
echo "Failed to kill the ClickHouse server $(pgrep -f clickhouse-server)"
return 1
fi
}
function wait_for_server_start
{
for _ in {1..60}
do
if clickhouse-client --query "select 1" || ! pgrep -f clickhouse-server
then
break
fi
sleep 1
done
if ! clickhouse-client --query "select 1"
then
echo "Failed to wait until ClickHouse server starts."
return 1
fi
echo "ClickHouse server pid '$(pgrep -f clickhouse-server)' started and responded"
}
function clone_root
{
git clone https://github.com/ClickHouse/ClickHouse.git | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/clone_log.txt
cd ClickHouse
CLICKHOUSE_DIR=`pwd`
CLICKHOUSE_DIR=$(pwd)
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
@ -15,18 +68,21 @@ if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
echo 'Clonned merge head'
else
git fetch
git checkout $COMMIT_SHA
git checkout "$COMMIT_SHA"
echo 'Checked out to commit'
fi
else
if [ "$COMMIT_SHA" != "" ]; then
git checkout $COMMIT_SHA
git checkout "$COMMIT_SHA"
fi
fi
}
SUBMODULES_TO_UPDATE="contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11"
function run
{
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11)
git submodule update --init --recursive $SUBMODULES_TO_UPDATE | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/submodule_log.txt
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/submodule_log.txt
export CMAKE_LIBS_CONFIG="-DENABLE_LIBRARIES=0 -DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_THINLTO=0 -DUSE_UNWIND=1"
@ -41,8 +97,7 @@ ccache --zero-stats ||:
mkdir build
cd build
CLICKHOUSE_BUILD_DIR=`pwd`
cmake .. -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 $CMAKE_LIBS_CONFIG | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/cmake_log.txt
cmake .. -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "$CMAKE_LIBS_CONFIG" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/cmake_log.txt
ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/build_log.txt
ninja install | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/install_log.txt
@ -54,8 +109,8 @@ mkdir -p /etc/clickhouse-client
mkdir -p /etc/clickhouse-server/config.d
mkdir -p /etc/clickhouse-server/users.d
ln -s /test_output /var/log/clickhouse-server
cp $CLICKHOUSE_DIR/programs/server/config.xml /etc/clickhouse-server/
cp $CLICKHOUSE_DIR/programs/server/users.xml /etc/clickhouse-server/
cp "$CLICKHOUSE_DIR/programs/server/config.xml" /etc/clickhouse-server/
cp "$CLICKHOUSE_DIR/programs/server/users.xml" /etc/clickhouse-server/
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
@ -86,21 +141,12 @@ ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-clien
# Keep original query_masking_rules.xml
ln -s --backup=simple --suffix=_original.xml /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
# Kill the server in case we are running locally and not in docker
kill_clickhouse
clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
counter=0
until clickhouse-client --query "SELECT 1"
do
sleep 0.1
if [ "$counter" -gt 1200 ]
then
break
fi
counter=$(($counter + 1))
done
wait_for_server_start
TESTS_TO_SKIP=(
parquet
@ -160,50 +206,58 @@ TESTS_TO_SKIP=(
01411_bayesian_ab_testing
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
00646_url_engine
00974_query_profiler
)
clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip ${TESTS_TO_SKIP[*]} 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt
clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt
kill_clickhouse () {
killall clickhouse-server ||:
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' /test_output/test_log.txt)
for i in {1..10}
do
if ! killall -0 clickhouse-server; then
echo "No clickhouse process"
break
else
echo "Clickhouse server process" $(pgrep -f clickhouse-server) "still alive"
sleep 10
fi
done
}
FAILED_TESTS=`grep 'FAIL\|TIMEOUT\|ERROR' /test_output/test_log.txt | awk 'BEGIN { ORS=" " }; { print substr($3, 1, length($3)-1) }'`
if [[ ! -z "$FAILED_TESTS" ]]; then
# We will rerun sequentially any tests that have failed during parallel run.
# They might have failed because there was some interference from other tests
# running concurrently. If they fail even in seqential mode, we will report them.
# FIXME All tests that require exclusive access to the server must be
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
# run them in a separate group after all other tests. This is faster and also
# explicit instead of guessing.
if [[ -n "${FAILED_TESTS[*]}" ]]
then
kill_clickhouse
# Clean the data so that there is no interference from the previous test run.
rm -rvf /var/lib/clickhouse ||:
mkdir /var/lib/clickhouse
clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
counter=0
until clickhouse-client --query "SELECT 1"
do
sleep 0.1
if [ "$counter" -gt 1200 ]
then
break
fi
wait_for_server_start
counter=$(($counter + 1))
done
echo "Going to run again: ${FAILED_TESTS[*]}"
echo "Going to run again: $FAILED_TESTS"
clickhouse-test --no-long --testname --shard --zookeeper $FAILED_TESTS 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_output/test_log.txt
clickhouse-test --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_output/test_log.txt
else
echo "No failed tests"
fi
}
case "$stage" in
"")
;&
"clone_root")
clone_root
# TODO bootstrap into the cloned script here. Add this on Sep 1 2020 or
# later, so that most of the old branches are updated with this code.
;&
"run")
run
;&
esac
pstree -apgT
jobs

View File

@ -15,6 +15,10 @@ function wait_server()
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
cat /var/log/clickhouse-server/stderr.log
cat /var/log/clickhouse-server/clickhouse-server.err.log
break
fi
sleep 0.5

View File

@ -31,7 +31,7 @@ For a description of request parameters, see [statement description](../../../sq
**ReplacingMergeTree Parameters**
- `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter.
- `ver` — column with version. Type `UInt*`, `Date`, `DateTime` or `DateTime64`. Optional parameter.
When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one:

View File

@ -5,6 +5,6 @@ toc_title: MaterializedView
# MaterializedView Table Engine {#materializedview}
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create/table.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.
Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide-->

View File

@ -34,7 +34,9 @@ toc_title: Adopters
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
@ -53,6 +55,7 @@ toc_title: Adopters
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
@ -63,6 +66,7 @@ toc_title: Adopters
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |

View File

@ -8,7 +8,7 @@ toc_title: Quotas
Quotas allow you to limit resource usage over a period of time or track the use of resources.
Quotas are set up in the user config, which is usually users.xml.
The system also has a feature for limiting the complexity of a single query. See the section “Restrictions on query complexity”).
The system also has a feature for limiting the complexity of a single query. See the section [Restrictions on query complexity](../operations/settings/query-complexity.md).
In contrast to query complexity restrictions, quotas:

View File

@ -1546,6 +1546,17 @@ Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-regist
Default value: `Empty`.
## input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
Enables using fields that are not specified in [Avro](../../interfaces/formats.md#data-format-avro) or [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
## background\_pool\_size {#background_pool_size}
Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and cant be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.

View File

@ -0,0 +1,85 @@
---
toc_priority: 114
---
# groupArraySample {#grouparraysample}
Creates an array of sample argument values. The size of the resulting array is limited to `max_size` elements. Argument values are selected and added to the array randomly.
**Syntax**
``` sql
groupArraySample(max_size)(x)
```
or
``` sql
groupArraySample(max_size, seed)(x)
```
**Parameters**
- `max_size` — Maximum size of the resulting array. Positive [UInt64](../../data-types/int-uint.md).
- `seed` — Seed for the random number generator. Optional, can be omitted. Positive [UInt64](../../data-types/int-uint.md). Default value: `123456`.
- `x` — Argument name. [String](../../data-types/string.md).
**Returned values**
- Array of randomly selected `x` arguments.
Type: [Array](../../data-types/array.md).
**Examples**
Consider table `colors`:
``` text
┌─id─┬─color──┐
│ 1 │ red │
│ 2 │ blue │
│ 3 │ green │
│ 4 │ white │
│ 5 │ orange │
└────┴────────┘
```
Select `id`-s query:
``` sql
SELECT groupArraySample(3)(id) FROM colors;
```
Result:
``` text
┌─groupArraySample(3)(id)─┐
│ [1,2,4] │
└─────────────────────────┘
```
Select `color`-s query:
``` sql
SELECT groupArraySample(3)(color) FROM colors;
```
Result:
```text
┌─groupArraySample(3)(color)─┐
│ ['white','blue','green'] │
└────────────────────────────┘
```
Select `color`-s query with different seed:
``` sql
SELECT groupArraySample(3, 987654321)(color) FROM colors;
```
Result:
```text
┌─groupArraySample(3, 987654321)(color)─┐
│ ['red','orange','green'] │
└───────────────────────────────────────┘
```

View File

@ -33,7 +33,7 @@ Para obtener una descripción de los parámetros de solicitud, consulte [descrip
**ReplacingMergeTree Parámetros**
- `ver` — column with version. Type `UInt*`, `Date` o `DateTime`. Parámetro opcional.
- `ver` — column with version. Type `UInt*`, `Date`, `DateTime` o `DateTime64`. Parámetro opcional.
Al fusionar, `ReplacingMergeTree` de todas las filas con la misma clave primaria deja solo una:

View File

@ -33,7 +33,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
**پارامترهای جایگزین**
- `ver` — column with version. Type `UInt*`, `Date` یا `DateTime`. پارامتر اختیاری.
- `ver` — column with version. Type `UInt*`, `Date`, `DateTime` یا `DateTime64`. پارامتر اختیاری.
هنگام ادغام, `ReplacingMergeTree` از تمام ردیف ها با همان کلید اصلی تنها یک برگ دارد:

View File

@ -33,7 +33,7 @@ Pour une description des paramètres de requête, voir [demande de description](
**ReplacingMergeTree Paramètres**
- `ver` — column with version. Type `UInt*`, `Date` ou `DateTime`. Paramètre facultatif.
- `ver` — column with version. Type `UInt*`, `Date`, `DateTime` ou `DateTime64`. Paramètre facultatif.
Lors de la fusion, `ReplacingMergeTree` de toutes les lignes avec la même clé primaire ne laisse qu'un:

View File

@ -33,7 +33,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
**ReplacingMergeTreeパラメータ**
- `ver` — column with version. Type `UInt*`, `Date` または `DateTime`. 任意パラメータ。
- `ver` — column with version. Type `UInt*`, `Date`, `DateTime` または `DateTime64`. 任意パラメータ。
マージ時, `ReplacingMergeTree` 同じ主キーを持つすべての行から、一つだけを残します:

View File

@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
**Параметры ReplacingMergeTree**
- `ver` — столбец с версией, тип `UInt*`, `Date` или `DateTime`. Необязательный параметр.
- `ver` — столбец с версией, тип `UInt*`, `Date`, `DateTime` или `DateTime64`. Необязательный параметр.
При слиянии, из всех строк с одинаковым значением ключа сортировки `ReplacingMergeTree` оставляет только одну:

View File

@ -1549,6 +1549,16 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
Значение по умолчанию: `Пустая строка`.
## input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
Позволяет использовать данные, которых не нашлось в схеме формата [Avro](../../interfaces/formats.md#data-format-avro) или [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent). Если поле не найдено в схеме, ClickHouse подставит значение по умолчанию вместо исключения.
Возможные значения:
- 0 — Выключена.
- 1 — Включена.
Значение по умолчанию: `0`.
## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views}
Устанавливает минимальное количество строк в блоке, который может быть вставлен в таблицу запросом `INSERT`. Блоки меньшего размера склеиваются в блоки большего размера. Настройка применяется только для блоков, вставляемых в [материализованное представление](../../sql-reference/statements/create/view.md#create-view). Настройка позволяет избежать избыточного потребления памяти.

View File

@ -964,7 +964,7 @@ SELECT flatten([[[1]], [[2], [3]]])
## arrayCompact {#arraycompact}
Удаляет дубликаты из массива. Порядок результирующих значений определяется порядком в исходном массиве.
Удаляет последовательно повторяющиеся элементы из массива. Порядок результирующих значений определяется порядком в исходном массиве.
**Синтаксис**
@ -978,7 +978,7 @@ arrayCompact(arr)
**Возвращаемое значение**
Массив без дубликатов.
Массив без последовательных дубликатов.
Тип: `Array`.

View File

@ -637,7 +637,7 @@ Upd. Готово (все директории кроме contrib).
Требует 7.26. Коллеги начали делать, есть результат.
Upd. В Аркадии частично работает небольшая часть тестов. И этого достаточно.
### 7.29. Опции clickhouse install, stop, start вместо postinst, init.d, systemd скриптов {#optsii-clickhouse-install-stop-start-vmesto-postinst-init-d-systemd-skriptov}
### 7.29. + Опции clickhouse install, stop, start вместо postinst, init.d, systemd скриптов {#optsii-clickhouse-install-stop-start-vmesto-postinst-init-d-systemd-skriptov}
Низкий приоритет.
@ -786,7 +786,7 @@ Upd. Готово.
Павел Круглов, ВШЭ и Яндекс.
Есть pull request. Готово.
### 8.17. ClickHouse как MySQL реплика {#clickhouse-kak-mysql-replika}
### 8.17. + ClickHouse как MySQL реплика {#clickhouse-kak-mysql-replika}
Задачу делает BohuTANG.
@ -1447,11 +1447,11 @@ Upd. Возможно будет отложено на следующий год
Василий Морозов, Арслан Гумеров, Альберт Кидрачев, ВШЭ.
В прошлом году задачу начинал делать другой человек, но не добился достаточного прогресса.
+ 1. Оптимизация top sort.
\+ 1. Оптимизация top sort.
В ClickHouse используется неоптимальный вариант top sort. Суть его в том, что из каждого блока достаётся top N записей, а затем, все блоки мержатся. Но доставание top N записей у каждого следующего блока бессмысленно, если мы знаем, что из них в глобальный top N войдёт меньше. Конечно нужно реализовать вариацию на тему priority queue (heap) с быстрым пропуском целых блоков, если ни одна строка не попадёт в накопленный top.
+ 2. Рекурсивный вариант сортировки по кортежам.
\+ 2. Рекурсивный вариант сортировки по кортежам.
Для сортировки по кортежам используется обычная сортировка с компаратором, который в цикле по элементам кортежа делает виртуальные вызовы `IColumn::compareAt`. Это неоптимально - как из-за короткого цикла по неизвестному в compile-time количеству элементов, так и из-за виртуальных вызовов. Чтобы обойтись без виртуальных вызовов, есть метод `IColumn::getPermutation`. Он используется в случае сортировки по одному столбцу. Есть вариант, что в случае сортировки по кортежу, что-то похожее тоже можно применить… например, сделать метод `updatePermutation`, принимающий аргументы offset и limit, и допереставляющий перестановку в диапазоне значений, в которых предыдущий столбец имел равные значения.
@ -1583,8 +1583,8 @@ Upd. Готово.
После 10.14.
[\#7237](https://github.com/ClickHouse/ClickHouse/issues/7237)
[\#2655](https://github.com/ClickHouse/ClickHouse/issues/2655)
[#7237](https://github.com/ClickHouse/ClickHouse/issues/7237)
[#2655](https://github.com/ClickHouse/ClickHouse/issues/2655)
### 22.23. Правильная обработка Nullable в функциях, которые кидают исключение на default значении: modulo, intDiv {#pravilnaia-obrabotka-nullable-v-funktsiiakh-kotorye-kidaiut-iskliuchenie-na-default-znachenii-modulo-intdiv}
@ -1598,7 +1598,7 @@ Upd. Готово.
### 22.26. Плохая производительность quantileTDigest {#plokhaia-proizvoditelnost-quantiletdigest}
[\#2668](https://github.com/ClickHouse/ClickHouse/issues/2668)
[#2668](https://github.com/ClickHouse/ClickHouse/issues/2668)
Алексей Миловидов или будет переназначено.

View File

@ -33,7 +33,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
**ReplacingMergeTree Parametreleri**
- `ver` — column with version. Type `UInt*`, `Date` veya `DateTime`. İsteğe bağlı parametre.
- `ver` — column with version. Type `UInt*`, `Date`, `DateTime` veya `DateTime64`. İsteğe bağlı parametre.
Birleş whenirken, `ReplacingMergeTree` aynı birincil anahtara sahip tüm satırlardan sadece bir tane bırakır:

View File

@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
**参数**
- `ver` — 版本列。类型为 `UInt*`, `Date``DateTime`。可选参数。
- `ver` — 版本列。类型为 `UInt*`, `Date`, `DateTime``DateTime64`。可选参数。
合并的时候,`ReplacingMergeTree` 从所有具有相同主键的行中选择一行留下:
- 如果 `ver` 列未指定,选择最后一条。

View File

@ -1,14 +1,12 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: ARRAY JOIN
---
# ARRAY JOIN子句 {#select-array-join-clause}
对于包含数组列的表来说,这是一种常见的操作,用于生成一个新表,该表具有包含该初始列的每个单独数组元素的列,而其他列的值将被重复。 这是什么基本情况 `ARRAY JOIN` 子句有
对于包含数组列的表来说是一种常见的操作,用于生成一个新表,该表具有包含该初始列的每个单独数组元素的列,而其他列的值将被重复显示。 这是 `ARRAY JOIN` 语句最基本的场景。
它的名字来自这样一个事实,即它可以被视为执行 `JOIN` 具有数组或嵌套数据结构。 意图类似于 [arrayJoin](../../../sql-reference/functions/array-join.md#functions_arrayjoin) 功能,但该子句功能更广泛。
它可以被视为执行 `JOIN` 具有数组或嵌套数据结构。 类似于 [arrayJoin](../../../sql-reference/functions/array-join.md#functions_arrayjoin) 功能,但该子句功能更广泛。
语法:
@ -20,16 +18,16 @@ FROM <left_subquery>
...
```
您只能指定一个 `ARRAY JOIN` a中的条款 `SELECT` 查询
您只能`SELECT` 查询指定一个 `ARRAY JOIN`
支持的类型 `ARRAY JOIN` 下面列出:
`ARRAY JOIN` 支持的类型有:
- `ARRAY JOIN` -在基本情况下,空数组不包括在结果中 `JOIN`.
- `LEFT ARRAY JOIN` -的结果 `JOIN` 包含具有空数组的行。 空数组的值设置为数组元素类型的默认值通常为0、空字符串或NULL
- `ARRAY JOIN` - 一般情况下,空数组不包括在结果中 `JOIN`.
- `LEFT ARRAY JOIN` - 的结果 `JOIN` 包含具有空数组的行。 空数组的值设置为数组元素类型的默认值通常为0、空字符串或NULL
## 基本数组连接示例 {#basic-array-join-examples}
## 基本 ARRAY JOIN 示例 {#basic-array-join-examples}
下面的例子演示的用法 `ARRAY JOIN``LEFT ARRAY JOIN` 条款 让我们创建一个表 [阵列](../../../sql-reference/data-types/array.md) 键入column并在其中插入值:
下面的例子展示 `ARRAY JOIN``LEFT ARRAY JOIN` 的用法,让我们创建一个表包含一个 [Array](../../../sql-reference/data-types/array.md) 的列并插入值:
``` sql
CREATE TABLE arrays_test
@ -50,7 +48,7 @@ VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []);
└─────────────┴─────────┘
```
下面的例子使用 `ARRAY JOIN` 条款:
下面的例子使用 `ARRAY JOIN` 子句:
``` sql
SELECT s, arr
@ -68,7 +66,7 @@ ARRAY JOIN arr;
└───────┴─────┘
```
下一个示例使用 `LEFT ARRAY JOIN` 条款:
下一个示例使用 `LEFT ARRAY JOIN` 子句:
``` sql
SELECT s, arr
@ -89,7 +87,7 @@ LEFT ARRAY JOIN arr;
## 使用别名 {#using-aliases}
可以为数组中的别名指定 `ARRAY JOIN` 条款 在这种情况下,数组项目可以通过此别名访问,但数组本身可以通过原始名称访问。 示例:
在使用`ARRAY JOIN` 时可以为数组指定别名,数组元素可以通过此别名访问,但数组本身则通过原始名称访问。 示例:
``` sql
SELECT s, arr, a
@ -107,7 +105,7 @@ ARRAY JOIN arr AS a;
└───────┴─────────┴───┘
```
使用别名,您可以执行 `ARRAY JOIN` 与外部阵列。 例如:
可以使用别名与外部数组执行 `ARRAY JOIN` 。 例如:
``` sql
SELECT s, arr_external
@ -129,7 +127,7 @@ ARRAY JOIN [1, 2, 3] AS arr_external;
└─────────────┴──────────────┘
```
多个数组可以在逗号分隔 `ARRAY JOIN` 条款 在这种情况下, `JOIN` 与它们同时执行(直接和,而不是笛卡尔积)。 请注意,所有数组必须具有相同的大小。 示例:
`ARRAY JOIN` 中,多个数组可以用逗号分隔, 在这例子中 `JOIN` 与它们同时执行直接sum,而不是笛卡尔积)。 请注意,所有数组必须具有相同的大小。 示例:
``` sql
SELECT s, arr, a, num, mapped
@ -242,7 +240,7 @@ ARRAY JOIN `nest.x`;
└───────┴────────┴────────────┘
```
可以将别名用于嵌套数据结构,以便选择 `JOIN` 结果或源数组。 例:
可以将别名用于嵌套数据结构,以便选择 `JOIN` 结果或源数组。 例:
``` sql
SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`
@ -260,7 +258,7 @@ ARRAY JOIN nest AS n;
└───────┴─────┴─────┴─────────┴────────────┘
```
使用的例子 [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) 功能:
使用功能 [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) 的例子:
``` sql
SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num
@ -278,6 +276,6 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num;
└───────┴─────┴─────┴─────────┴────────────┴─────┘
```
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
运行时优化查询执行顺序 `ARRAY JOIN`. 虽然 `ARRAY JOIN` 必须始终之前指定 [WHERE](../../../sql-reference/statements/select/where.md)/[PREWHERE](../../../sql-reference/statements/select/prewhere.md) 子句中的查询,从技术上讲,它们可以以任何顺序执行,除非结果 `ARRAY JOIN` 用于过滤。 处理顺序由查询优化器控制。

View File

@ -1,32 +1,30 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: DISTINCT
---
# DISTINCT子句 {#select-distinct}
如果 `SELECT DISTINCT` 如果指定,则查询结果中只保留唯一行。 因此,在结果中所有完全匹配的行集合中,只有一行保留。
如果 `SELECT DISTINCT` 被声明,则查询结果中只保留唯一行。 因此,在结果中所有完全匹配的行集合中,只有一行保留。
## 空处理 {#null-processing}
`DISTINCT` 适用于 [NULL](../../../sql-reference/syntax.md#null-literal) 就好像 `NULL` 是一个特定的值,并且 `NULL==NULL`. 换句话说,在 `DISTINCT` 结果,不同的组合 `NULL` 仅发生一次。 它不同于 `NULL` 在大多数其他上下文中进行处理
`DISTINCT` 适用于 [NULL](../../../sql-reference/syntax.md#null-literal) 就好像 `NULL` 是一个特定的值,并且 `NULL==NULL`. 换句话说,在 `DISTINCT` 结果,不同的组合 `NULL` 仅发生一次。 它不同于 `NULL` 在大多数其他情况中的处理方式
## 替代办法 {#alternatives}
通过应用可以获得相同的结果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 在同一组值指定为 `SELECT` 子句,而不使用任何聚合函数。 但有几个区别 `GROUP BY` 方法:
通过应用可以获得相同的结果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 在同一组值指定为 `SELECT` 子句,并且不使用任何聚合函数。 但与 `GROUP BY` 有几个不同的地方:
- `DISTINCT` 可以一起应用 `GROUP BY`.
- 当 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 省略和 [LIMIT](../../../sql-reference/statements/select/limit.md) 定义时,查询在读取所需数量的不同行后立即停止运行。
- `DISTINCT` 可以`GROUP BY` 一起使用.
- 当 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 被省略并且 [LIMIT](../../../sql-reference/statements/select/limit.md) 定义时,在读取所需数量的不同行后立即停止运行。
- 数据块在处理时输出,而无需等待整个查询完成运行。
## 限制 {#limitations}
`DISTINCT` 如果不支持 `SELECT` 具有至少一个数组列。
`DISTINCT` 不支持当 `SELECT` 包含有数组的列。
## 例 {#examples}
## 例 {#examples}
ClickHouse支持使用 `DISTINCT``ORDER BY` 一个查询中不同列的子句。 该 `DISTINCT` 子句之前执行 `ORDER BY` 条款
ClickHouse支持使用 `DISTINCT``ORDER BY` 在一个查询中的不同的列。 `DISTINCT` 子句在 `ORDER BY` 子句前被执行。
示例表:
@ -39,7 +37,7 @@ ClickHouse支持使用 `DISTINCT` 和 `ORDER BY` 一个查询中不同列的子
└───┴───┘
```
与选择数据 `SELECT DISTINCT a FROM t1 ORDER BY b ASC` 查询,我们得到以下结果:
执行 `SELECT DISTINCT a FROM t1 ORDER BY b ASC` 查询数据,我们得到以下结果:
``` text
┌─a─┐
@ -59,6 +57,6 @@ ClickHouse支持使用 `DISTINCT` 和 `ORDER BY` 一个查询中不同列的子
└───┘
```
`2, 4` 分拣前被切割。
`2, 4` 排序前被切割。
在编程查询时考虑这种实现特性。
在编程查询时考虑这种实现特性。

View File

@ -1,19 +1,17 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: FORMAT
---
# 格式子句 {#format-clause}
# 格式子句 {#format-clause}
ClickHouse支持广泛的 [序列化格式](../../../interfaces/formats.md) 可用于查询结果等。 有多种方法可以选择以下格式 `SELECT` 输出,其中之一是指定 `FORMAT format` 在查询结束时以任何特定格式获取结果数据
ClickHouse支持广泛的 [序列化格式](../../../interfaces/formats.md) 可用于查询结果等。 有多种方法可以选择格式 `SELECT` 输出,其中之一是指定 `FORMAT format` 在查询结束时以任何特定格式获取结果
特定的格式可以用于方便使用,与其他系统集成或性能增益
特定的格式方便使用,与其他系统集成或增强性能。
## 默认格式 {#default-format}
如果 `FORMAT` 省略子句,使用默认格式这取决于用于访问ClickHouse服务器的设置和接口。 为 [HTTP接口](../../../interfaces/http.md) 和 [命令行客户端](../../../interfaces/cli.md) 在批处理模式下,默认格式为 `TabSeparated`. 对于交互模式下的命令行客户端,默认格式为 `PrettyCompact` (它生成紧凑的人类可读表)。
如果 `FORMAT` 被省略则使用默认格式这取决于用于访问ClickHouse服务器的设置和接口。 为 [HTTP接口](../../../interfaces/http.md) 和 [命令行客户端](../../../interfaces/cli.md) 在批处理模式下,默认格式为 `TabSeparated`. 对于交互模式下的命令行客户端,默认格式为 `PrettyCompact` (它生成紧凑的人类可读表)。
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
使用命令行客户端时,数据始终以内部高效格式通过网络传递 (`Native`). 客户端独立解释 `FORMAT` 查询子句并格式化数据本身(从而减轻网络和服务器的额外负载)。
使用命令行客户端时,数据始终以内部高效格式通过网络传递 (`Native`). 客户端独立解释 `FORMAT` 查询子句并格式化数据本身(以减轻网络和服务器的额外负担)。

View File

@ -1,31 +1,29 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: FROM
---
# FROM条款 {#select-from}
# FROM子句 {#select-from}
`FROM` 子句指定从中读取数据的源:
`FROM` 子句指定从以下数据源中读取数据:
- [](../../../engines/table-engines/index.md)
- [子查询](../../../sql-reference/statements/select/index.md) {## TODO: better link ##}
- [表函数](../../../sql-reference/table-functions/index.md#table-functions)
[JOIN](../../../sql-reference/statements/select/join.md) 和 [ARRAY JOIN](../../../sql-reference/statements/select/array-join.md) 子句也可以用来扩展的功能 `FROM` 条款
[JOIN](../../../sql-reference/statements/select/join.md) 和 [ARRAY JOIN](../../../sql-reference/statements/select/array-join.md) 子句也可以用来扩展 `FROM` 的功能
子查询是另一个 `SELECT` 可以在括号内指定的查询 `FROM` 条款
子查询是另一个 `SELECT` 可以指定在 `FROM` 后的括号内的查询。
`FROM` 子句可以包含多个数据源,用逗号分隔,这相当于执行 [CROSS JOIN](../../../sql-reference/statements/select/join.md) 在他们身上
`FROM` 子句可以包含多个数据源,用逗号分隔,这相当于在他们身上执行 [CROSS JOIN](../../../sql-reference/statements/select/join.md)
## 最终修饰符 {#select-from-final}
## FINAL 修饰符 {#select-from-final}
`FINAL` 如果指定ClickHouse会在返回结果之前完全合并数据从而执行给定表引擎合并期间发生的所有数据转换。
`FINAL` 指定ClickHouse会在返回结果之前完全合并数据从而执行给定表引擎合并期间发生的所有数据转换。
它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-发动机系列(除了 `GraphiteMergeTree`). 还支持:
它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族(除了 `GraphiteMergeTree`). 还支持:
- [复制](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎
- [查看](../../../engines/table-engines/special/view.md), [缓冲区](../../../engines/table-engines/special/buffer.md), [分布](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要它们是在创建 `MergeTree`-发动机表
- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎
- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可
### 缺点 {#drawbacks}
@ -36,10 +34,10 @@ toc_title: FROM
**在大多数情况下,避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生,并通过应用聚合(例如,丢弃重复项)来处理它。 {## TODO: examples ##}
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
如果 `FROM` 子句被省略,数据将从读取 `system.one` 桌子
`system.one` 表只包含一行(此表满足与其他Dbms中找到的双表相同的目的)。
如果 `FROM` 子句被省略,数据将从读取 `system.one` 表。
`system.one` 表只包含一行(此表满足与其他 DBMS 中的 DUAL 表有相同的作用)。
若要执行查询,将从相应的表中提取查询中列出的所有列。 外部查询不需要的任何列都将从子查询中抛出。
如果查询未列出任何列(例如, `SELECT count() FROM t`),无论如何都会从表中提取一些列(最小的列是首选),以便计算行数。
如果查询未列出任何列(例如, `SELECT count() FROM t`),无论如何都会从表中提取一些列(首选是最小的列),以便计算行数。

View File

@ -1,27 +1,25 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: GROUP BY
---
# GROUP BY子句 {#select-group-by-clause}
`GROUP BY` 子句切换 `SELECT` 查询转换为聚合模式,其工作原理如下:
`GROUP BY` 子句`SELECT` 查询结果转换为聚合模式,其工作原理如下:
- `GROUP BY` 子句包含表达式列表(或单个表达式,其被认为是长度为1的列表。 这份名单充当 “grouping key”而每个单独的表达式将被称为 “key expressions”.
- 在所有的表达式 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 条款 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。
- `GROUP BY` 子句包含表达式列表(或单个表达式 -- 可以认为是长度为1的列表。 这份名单充当 “grouping key”而每个单独的表达式将被称为 “key expressions”.
- 在所有的表达式 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句中 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。
- 聚合结果 `SELECT` 查询将包含尽可能多的行,因为有唯一值 “grouping key” 在源表中。 通常这会显着减少行数,通常是数量级,但不一定:如果所有行数保持不变 “grouping key” 值是不同的。
!!! note "注"
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY clause` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
## 空处理 {#null-processing}
对于分组ClickHouse解释 [NULL](../../../sql-reference/syntax.md#null-literal) 作为一个值,并且 `NULL==NULL`. 它不同于 `NULL` 在大多数其他上下文中进行处理
对于分组ClickHouse解释 [NULL](../../../sql-reference/syntax.md#null-literal) 作为一个值,并且 `NULL==NULL`. 它不同于 `NULL` 在大多数其他上下文中的处理方式
这里有一个例子来说明这意味着什么。
假设你有这张桌子:
假设你有一张表:
``` text
┌─x─┬────y─┐
@ -47,20 +45,20 @@ toc_title: GROUP BY
如果你通过几个键 `GROUP BY`,结果会给你选择的所有组合,就好像 `NULL` 是一个特定的值。
## 使用总计修饰符 {#with-totals-modifier}
## WITH TOTAL 修饰符 {#with-totals-modifier}
如果 `WITH TOTALS` 指定修饰符,将计算另一行。 此行将具有包含默认值(零或空行)的关键列,以及包含跨所有行计算值的聚合函数列( “total” 值)。
如果 `WITH TOTALS` 指定,将计算另一行。 此行将具有包含默认值(零或空行)的关键列,以及包含跨所有行计算值的聚合函数列( “total” 值)。
这个额外的行仅产生于 `JSON*`, `TabSeparated*`,和 `Pretty*` 格式,与其他行分开:
- 在 `JSON*` 格式,这一行是作为一个单独的输出 totals
- 在 `JSON*` 格式,这一行是作为一个单独的输出 totals 字段
- 在 `TabSeparated*` 格式,该行位于主结果之后,前面有一个空行(在其他数据之后)。
- 在 `Pretty*` 格式时,该行在主结果之后作为单独的表输出。
- 在其他格式中,它不可用。
`WITH TOTALS` 可以以不同的方式运行时 [HAVING](../../../sql-reference/statements/select/having.md) 是存在的。 该行为取决于 `totals_mode` 设置。
### 配置合计处理 {#configuring-totals-processing}
### 配置总和处理 {#configuring-totals-processing}
默认情况下, `totals_mode = 'before_having'`. 在这种情况下, totals 是跨所有行计算,包括那些不通过具有和 `max_rows_to_group_by`.
@ -78,7 +76,7 @@ toc_title: GROUP BY
您可以使用 `WITH TOTALS` 在子查询中,包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句(在这种情况下,将各自的总值合并)。
## 例 {#examples}
## 例 {#examples}
示例:
@ -90,7 +88,7 @@ SELECT
FROM hits
```
但是与标准SQL相比如果表没有任何行根本没有任何行或者在使用WHERE to filter之后没有任何行),则返回一个空结果,而不是来自包含聚合函数初始值的行
但是与标准SQL相比如果表没有任何行根本没有任何行或者使用 WHERE 过滤之后没有任何行),则返回一个空结果,而不是来自包含聚合函数初始值的行
相对于MySQL并且符合标准SQL您无法获取不在键或聚合函数常量表达式除外中的某些列的某些值。 要解决此问题,您可以使用 any 聚合函数(获取第一个遇到的值)或 min/max.
@ -111,9 +109,9 @@ GROUP BY domain
不能将常量指定为聚合函数的参数。 示例: `sum(1)`. 相反,你可以摆脱常数。 示例: `count()`.
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
聚合是面向列的DBMS最重要的功能之一因此它的实现是ClickHouse中最优化的部分之一。 默认情况下,聚合使用哈希表在内存中完成。 它有40+的专业化是自动选择取决于 “grouping key” 数据类型。
聚合是面向列的 DBMS 最重要的功能之一因此它的实现是ClickHouse中最优化的部分之一。 默认情况下,聚合使用哈希表在内存中完成。 它有 40+ 的特殊化自动选择取决于 “grouping key” 数据类型。
### 在外部存储器中分组 {#select-group-by-in-external-memory}
@ -122,12 +120,12 @@ GROUP BY domain
使用时 `max_bytes_before_external_group_by`,我们建议您设置 `max_memory_usage` 大约两倍高。 这是必要的因为聚合有两个阶段读取数据和形成中间数据1和合并中间数据2。 将数据转储到文件系统只能在阶段1中发生。 如果未转储临时数据则阶段2可能需要与阶段1相同的内存量。
例如,如果 [max\_memory\_usage](../../../operations/settings/settings.md#settings_max_memory_usage) 设置为10000000000你想使用外部聚合这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000`max_memory_usage` 到200亿。 当触发外部聚合如果至少有一个临时数据转储RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`.
例如,如果 [max\_memory\_usage](../../../operations/settings/settings.md#settings_max_memory_usage) 设置为10000000000你想使用外部聚合这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000`max_memory_usage` 到20000000000。 当触发外部聚合如果至少有一个临时数据转储RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`.
通过分布式查询处理,在远程服务器上执行外部聚合。 为了使请求者服务器只使用少量的RAM设置 `distributed_aggregation_memory_efficient` 到1。
当合并数据刷新到磁盘时,以及当合并来自远程服务器的结果时, `distributed_aggregation_memory_efficient` 设置被启用,消耗高达 `1/256 * the_number_of_threads` 从RAM的总量。
当启用外部聚合时,如果有小于 `max_bytes_before_external_group_by` of data (i.e. data was not flushed), the query runs just as fast as without external aggregation. If any temporary data was flushed, the run time will be several times longer (approximately three times).
当启用外部聚合时,如果数据量小于 `max_bytes_before_external_group_by` (例如数据没有被 flushed), 查询执行速度和不在外部聚合的速度一样快. 如果临时数据被flushed到外部存储, 执行的速度会慢几倍 (大概是三倍).
如果你有一个 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 用一个 [LIMIT](../../../sql-reference/statements/select/limit.md) 后 `GROUP BY`然后使用的RAM的量取决于数据的量 `LIMIT`,不是在整个表。 但如果 `ORDER BY` 没有 `LIMIT`,不要忘记启用外部排序 (`max_bytes_before_external_sort`).

View File

@ -1,15 +1,13 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: HAVING
---
# 有条款 {#having-clause}
# HAVING 子句 {#having-clause}
允许过滤由以下方式生成的聚合结果 [GROUP BY](../../../sql-reference/statements/select/group-by.md). 它类似于 [WHERE](../../../sql-reference/statements/select/where.md) 条款,但不同的是 `WHERE` 在聚合之前执行,而 `HAVING` 之后进行。
允许过滤由 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 生成的聚合结果. 它类似于 [WHERE](../../../sql-reference/statements/select/where.md) ,但不同的是 `WHERE` 在聚合之前执行,而 `HAVING` 之后进行。
可以从以下引用聚合结果 `SELECT` 中的条款 `HAVING` 子句由他们的化名。 或者, `HAVING` 子句可以筛选查询结果中未返回的其他聚合的结果。
可以从 `SELECT` 生成的聚合结果中通过他们的别名来执行 `HAVING` 子句。 或者 `HAVING` 子句可以筛选查询结果中未返回的其他聚合的结果。
## 限制 {#limitations}
`HAVING` 如果不执行聚合则无法使用。 使用 `WHERE` 相反。
`HAVING` 如果不执行聚合则无法使用。 使用 `WHERE` 相反。

View File

@ -1,15 +1,13 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
title: SELECT Query
toc_folder_title: SELECT
toc_priority: 33
toc_title: "\u6982\u8FF0"
toc_title: 综述
---
# 选择查询 {#select-queries-syntax}
`SELECT` 查询执行数据检索。 默认情况下,请求的数据返回给客户端,同时 [INSERT INTO](../../../sql-reference/statements/insert-into.md) 可以被转发到不同的表。
`SELECT` 查询执行数据检索。 默认情况下,请求的数据返回给客户端,同时结合 [INSERT INTO](../../../sql-reference/statements/insert-into.md) 可以被转发到不同的表。
## 语法 {#syntax}
@ -32,45 +30,45 @@ SELECT [DISTINCT] expr_list
[FORMAT format]
```
所有子句都是可选的,但紧接在后面的必需表达式列表除外 `SELECT` 这是更详细的复盖 [下面](#select-clause).
所有子句都是可选的,但紧接在 `SELECT` 后面的必需表达式列表除外,更详细的请看 [下面](#select-clause).
每个可选子句的具体内容在单独的部分中进行介绍,这些部分按与执行顺序相同的顺序列出:
每个可选子句的具体内容在单独的部分中进行介绍,这些部分按与执行顺序相同的顺序列出:
- [WITH条款](../../../sql-reference/statements/select/with.md)
- [FROM条款](../../../sql-reference/statements/select/from.md)
- [示例子句](../../../sql-reference/statements/select/sample.md)
- [JOIN子句](../../../sql-reference/statements/select/join.md)
- [PREWHERE条款](../../../sql-reference/statements/select/prewhere.md)
- [WHERE条款](../../../sql-reference/statements/select/where.md)
- [GROUP BY子句](../../../sql-reference/statements/select/group-by.md)
- [限制条款](../../../sql-reference/statements/select/limit-by.md)
- [有条款](../../../sql-reference/statements/select/having.md)
- [SELECT子句](#select-clause)
- [DISTINCT子句](../../../sql-reference/statements/select/distinct.md)
- [限制条款](../../../sql-reference/statements/select/limit.md)
- [UNION ALL条款](../../../sql-reference/statements/select/union-all.md)
- [INTO OUTFILE条款](../../../sql-reference/statements/select/into-outfile.md)
- [格式子句](../../../sql-reference/statements/select/format.md)
- [WITH 子句](../../../sql-reference/statements/select/with.md)
- [FROM 子句](../../../sql-reference/statements/select/from.md)
- [SAMPLE 子句](../../../sql-reference/statements/select/sample.md)
- [JOIN 子句](../../../sql-reference/statements/select/join.md)
- [PREWHERE 子句](../../../sql-reference/statements/select/prewhere.md)
- [WHERE 子句](../../../sql-reference/statements/select/where.md)
- [GROUP BY 子句](../../../sql-reference/statements/select/group-by.md)
- [LIMIT BY 子句](../../../sql-reference/statements/select/limit-by.md)
- [HAVING 子句](../../../sql-reference/statements/select/having.md)
- [SELECT 子句](#select-clause)
- [DISTINCT 子句](../../../sql-reference/statements/select/distinct.md)
- [LIMIT 子句](../../../sql-reference/statements/select/limit.md)
- [UNION ALL 子句](../../../sql-reference/statements/select/union-all.md)
- [INTO OUTFILE 子句](../../../sql-reference/statements/select/into-outfile.md)
- [FORMAT 子句](../../../sql-reference/statements/select/format.md)
## SELECT子句 {#select-clause}
## SELECT 子句 {#select-clause}
[表达式](../../../sql-reference/syntax.md#syntax-expressions) 指定 `SELECT` 子句是在上述子句中的所有操作完成后计算的。 这些表达式的工作方式就好像它们应用于结果中的单独行一样。 如果表达式 `SELECT` 子句包含聚合函数然后ClickHouse处理过程中用作其参数的聚合函数和表达式 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 聚合。
[表达式](../../../sql-reference/syntax.md#syntax-expressions) 指定 `SELECT` 子句是在上述子句中的所有操作完成后计算的。 这些表达式的工作方式就好像它们应用于结果中的单独行一样。 如果表达式 `SELECT` 子句包含聚合函数然后ClickHouse将使用 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 聚合参数应用在聚合函数和表达式上
如果在结果中包含所有列,请使用星号 (`*`)符号。 例如, `SELECT * FROM ...`.
如果在结果中包含所有列,请使用星号 (`*`)符号。 例如, `SELECT * FROM ...`.
将结果中的某些列与 [re2](https://en.wikipedia.org/wiki/RE2_(software)) 正则表达式,可以使用 `COLUMNS` 表达。
将结果中的某些列与 [re2](https://en.wikipedia.org/wiki/RE2_(software)) 正则表达式匹配,可以使用 `COLUMNS` 表达。
``` sql
COLUMNS('regexp')
```
例如,考虑表:
例如表:
``` sql
CREATE TABLE default.col_names (aa Int8, ab Int8, bc Int8) ENGINE = TinyLog
```
以下查询从包含以下内容的所有列中选择数据 `a` 在他们的名字符号
以下查询所有列名包含 `a`
``` sql
SELECT COLUMNS('a') FROM col_names
@ -84,7 +82,7 @@ SELECT COLUMNS('a') FROM col_names
所选列不按字母顺序返回。
您可以使用多个 `COLUMNS` 查询中的表达式并将函数应用于它们。
您可以使用多个 `COLUMNS` 表达式并将函数应用于它们。
例如:
@ -98,7 +96,7 @@ SELECT COLUMNS('a'), COLUMNS('c'), toTypeName(COLUMNS('c')) FROM col_names
└────┴────┴────┴────────────────┘
```
返回的每一列 `COLUMNS` 表达式作为单独的参数传递给函数。 如果函数支持其他参数,您也可以将其他参数传递给函数。 使用函数时要小心。 如果函数不支持您传递给它的参数数ClickHouse将引发异常。
返回的每一列 `COLUMNS` 表达式作为单独的参数传递给函数。 如果函数支持其他参数,您也可以将其他参数传递给函数。 使用函数时要小心如果函数不支持传递给它的参数ClickHouse将抛出异常。
例如:
@ -111,41 +109,41 @@ Received exception from server (version 19.14.1):
Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of arguments for function plus doesn't match: passed 3, should be 2.
```
在这个例子中, `COLUMNS('a')` 返回两列: `aa``ab`. `COLUMNS('c')` 返回 `bc` 列。 该 `+` 运算符不能应用于3个参数因此ClickHouse引发一个带有相关消息的异常。
例子中, `COLUMNS('a')` 返回两列: `aa``ab`. `COLUMNS('c')` 返回 `bc` 列。 该 `+` 运算符不能应用于3个参数因此ClickHouse抛出一个带有相关消息的异常。
匹配的列 `COLUMNS` 表达式可以具有不同的数据类型。 如果 `COLUMNS` 不匹配任何列,并且是唯一的表达式 `SELECT`ClickHouse抛出异常。
匹配的列 `COLUMNS` 表达式可以具有不同的数据类型。 如果 `COLUMNS` 不匹配任何列,并且是`SELECT` 唯一的表达式ClickHouse则抛出异常。
### 星号 {#asterisk}
您可以在查询的任何部分而不是表达式中添加星号。 分析查询时,星号将展开为所有表列的列表(不包括 `MATERIALIZED``ALIAS` 列)。 只有少数情况下使用星号是合理的:
您可以在查询的任何部分使用星号替代表达式。进行查询分析、时,星号将展开为所有表的列(不包括 `MATERIALIZED``ALIAS` 列)。 只有少数情况下使用星号是合理的:
- 创建转储时。
- 创建转储时。
- 对于只包含几列的表,例如系统表。
- 获取有关表中哪些列的信息。 在这种情况下,设置 `LIMIT 1`. 但最好使用 `DESC TABLE` 查询。
- 当对少量柱进行强过滤时,使用 `PREWHERE`.
- 获取表中列的信息。 在这种情况下,设置 `LIMIT 1`. 但最好使用 `DESC TABLE` 查询。
- 当对少量列使用 `PREWHERE` 进行强过滤时。
- 在子查询中(因为外部查询不需要的列从子查询中排除)。
在所有其他情况下我们不建议使用星号因为它只给你一个列DBMS的缺点而不是优点。 换句话说,不建议使用星号。
### 极端值 {#extreme-values}
结果之外,还可以获取结果列的最小值和最大值。 要做到这一点,设置 **极端** 设置为1。 最小值和最大值是针对数字类型、日期和带有时间的日期计算的。 对于其他列,默认值为输出
除结果之外,还可以获取结果列的最小值和最大值。 要做到这一点,设置 **extremes** 设置为1。 最小值和最大值是针对数字类型、日期和带有时间的日期计算的。 对于其他类型列,输出默认值。
An extra two rows are calculated the minimums and maximums, respectively. These extra two rows are output in `JSON*`, `TabSeparated*`,和 `Pretty*` [格式](../../../interfaces/formats.md),与其他行分开。 它们不是其他格式的输出。
分别的额外计算两行 最小值和最大值。 这额外的两行采用输出格式为 `JSON*`, `TabSeparated*`,和 `Pretty*` [formats](../../../interfaces/formats.md),与其他行分开。 它们不以其他格式输出。
`JSON*` 格式时,极端值在一个单独的输出 extremes 场。 在 `TabSeparated*` 格式中,该行来的主要结果之后,和之后 totals 如果存在。 它前面有一个空行(在其他数据之后)。 在 `Pretty*` 格式中,该行被输出为一个单独的表之后的主结果,和之后 `totals` 如果存在
`JSON*` 格式时,极端值单独的输出在 extremes 字段。 为 `TabSeparated*` 格式时,此行来的主要结果集后,然后显示 totals 字段。 它前面有一个空行(在其他数据之后)。 在 `Pretty*` 格式时,该行在主结果之后输出为一个单独的表,然后显示 totals 字段
值计算之前的行 `LIMIT`,但之后 `LIMIT BY`. 但是,使用时 `LIMIT offset, size`,之前的行 `offset` 都包含在 `extremes`. 在流请求中,结果还可能包括少量通过的行 `LIMIT`.
端值在 `LIMIT` 之前被计算,但在 `LIMIT BY` 之后被计算. 然而,使用 `LIMIT offset, size` `offset` 之前的行都包含在 `extremes`. 在流请求中,结果还可能包括少量通过 `LIMIT` 过滤的行.
### 注 {#notes}
### 注 {#notes}
您可以使用同义词 (`AS` 别名)在查询的任何部分
您可以在查询的任何部分使用同义词 (`AS` 别名)。
`GROUP BY``ORDER BY` 子句不支持位置参数。 这与MySQL相矛盾但符合标准SQL。 例如, `GROUP BY 1, 2` will be interpreted as grouping by constants (i.e. aggregation of all rows into one).
`GROUP BY``ORDER BY` 子句不支持位置参数。 这与MySQL相矛盾但符合标准SQL。 例如, `GROUP BY 1, 2` 将被理解为根据常量分组 (i.e. aggregation of all rows into one).
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
如果查询省略 `DISTINCT`, `GROUP BY` `ORDER BY` 条款和 `IN``JOIN` 子查询查询将被完全流处理使用O(1)量的RAM。 否则,如果未指定适当的限制则查询可能会消耗大量RAM:
如果查询省略 `DISTINCT`, `GROUP BY` `ORDER BY` `IN` `JOIN` 子查询查询将被完全流处理使用O(1)量的RAM。 若未指定适当的限制则查询可能会消耗大量RAM:
- `max_memory_usage`
- `max_rows_to_group_by`

View File

@ -1,15 +1,13 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: INTO OUTFILE
---
# INTO OUTFILE条款 {#into-outfile-clause}
# INTO OUTFILE 子句 {#into-outfile-clause}
添加 `INTO OUTFILE filename` 子句其中filename是字符串文字 `SELECT query` 将其输出重定向到客户端上的指定文件。
添加 `INTO OUTFILE filename` 子句其中filename是字符串 `SELECT query` 将其输出重定向到客户端上的指定文件。
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
- 此功能是在可用 [命令行客户端](../../../interfaces/cli.md) 和 [ツ环板-ョツ嘉ッツ偲](../../../operations/utilities/clickhouse-local.md). 因此通过发送查询 [HTTP接口](../../../interfaces/http.md) 都会失败
- 此功能是在可用 [命令行客户端](../../../interfaces/cli.md) 和 [clickhouse-local](../../../operations/utilities/clickhouse-local.md). 因此通过 [HTTP接口](../../../interfaces/http.md) 发送查询将会失败。
- 如果具有相同文件名的文件已经存在,则查询将失败。
- 默认值 [输出格式](../../../interfaces/formats.md) 是 `TabSeparated` (就像在命令行客户端批处理模式中一样)。

View File

@ -1,6 +1,4 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: JOIN
---
@ -29,7 +27,7 @@ FROM <left_table>
- `FULL OUTER JOIN`,除了匹配的行之外,还会返回两个表中的非匹配行。
- `CROSS JOIN`,产生整个表的笛卡尔积, “join keys” 是 **不** 指定。
`JOIN` 没有指定类型暗 `INNER`. 关键字 `OUTER` 可以安全地省略。 替代语法 `CROSS JOIN` 在指定多个表 [FROM条款](../../../sql-reference/statements/select/from.md) 用逗号分隔。
`JOIN` 没有指定类型暗 `INNER`. 关键字 `OUTER` 可以安全地省略。 替代语法 `CROSS JOIN` 在指定多个表 [FROM](../../../sql-reference/statements/select/from.md) 用逗号分隔。
ClickHouse中提供的其他联接类型:
@ -53,7 +51,7 @@ ClickHouse中提供的其他联接类型:
- 必须包含有序序列。
- 可以是以下类型之一: [Int*UInt*](../../../sql-reference/data-types/int-uint.md), [浮动\*](../../../sql-reference/data-types/float.md), [日期](../../../sql-reference/data-types/date.md), [日期时间](../../../sql-reference/data-types/datetime.md), [十进制\*](../../../sql-reference/data-types/decimal.md).
- 不能是唯一的列 `JOIN` 条款
- 不能是唯一的列 `JOIN`
语法 `ASOF JOIN ... ON`:
@ -150,7 +148,7 @@ USING (equi_column1, ... equi_columnN, asof_column)
当任何这些限制达到ClickHouse作为 [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) 设置指示。
## 例 {#examples}
## 例 {#examples}
示例:

View File

@ -1,12 +1,10 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: LIMIT BY
---
# 限制条款 {#limit-by-clause}
# LIMIT BY子句 {#limit-by-clause}
与查询 `LIMIT n BY expressions` 子句选择第一个 `n` 每个不同值的行 `expressions`. 的关键 `LIMIT BY` 可以包含任意数量的 [表达式](../../../sql-reference/syntax.md#syntax-expressions).
与查询 `LIMIT n BY expressions` 子句选择第一个 `n` 每个不同值的行 `expressions`. `LIMIT BY` 可以包含任意数量的 [表达式](../../../sql-reference/syntax.md#syntax-expressions).
ClickHouse支持以下语法变体:
@ -20,7 +18,7 @@ ClickHouse支持以下语法变体:
## 例 {#examples}
表:
表:
``` sql
CREATE TABLE limit_by(id Int, val Int) ENGINE = Memory;

View File

@ -1,15 +1,62 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: LIMIT
---
# 限制条款 {#limit-clause}
# LIMIT {#limit-clause}
`LIMIT m` 允许选择第一个 `m` 结果中的行。
`LIMIT m` 允许选择结果中起始的 `m` 行。
`LIMIT n, m` 允许选择 `m` 跳过第一个结果后的行 `n` 行。 `LIMIT m OFFSET n` 语法是等效的。
`LIMIT n, m` 允许选择 `m` 跳过第一个结果后的行 `n` 行。 `LIMIT m OFFSET n` 语法是等效的。
`n``m` 必须是非负整数。
如果没有 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句显式排序结果,结果的行选择可能是任意的和非确定性的。
## LIMIT … WITH TIES 修饰符 {#limit-with-ties}
如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ,并且声明了 `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`.
此修饰符可以与: [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用.
例如以下查询:
``` sql
SELECT * FROM (
SELECT number%50 AS n FROM numbers(100)
) ORDER BY n LIMIT 0,5
```
返回
``` text
┌─n─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
└───┘
```
单子执行了 `WITH TIES` 修饰符后
``` sql
SELECT * FROM (
SELECT number%50 AS n FROM numbers(100)
) ORDER BY n LIMIT 0,5 WITH TIES
```
则返回了以下的数据行
``` text
┌─n─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
└───┘
```
cause row number 6 have same value “2” for field `n` as row number 5

View File

@ -1,12 +1,10 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: ORDER BY
---
# 按条款订购 {#select-order-by}
# ORDER BY {#select-order-by}
`ORDER BY` 子句包含一个表达式列表,每个表达式都可以用 `DESC` (降序)或 `ASC` (升序)修饰符确定排序方向。 如果未指定方向, `ASC` 假设,所以它通常被省略。 排序方向适用于单个表达式,而不适用于整个列表。 示例: `ORDER BY Visits DESC, SearchPhrase`
`ORDER BY` 子句包含一个表达式列表,每个表达式都可以用 `DESC` (降序)或 `ASC` (升序)修饰符确定排序方向。 如果未指定方向, 默认是 `ASC` ,所以它通常被省略。 排序方向适用于单个表达式,而不适用于整个列表。 示例: `ORDER BY Visits DESC, SearchPhrase`
对于排序表达式列表具有相同值的行以任意顺序输出,也可以是非确定性的(每次都不同)。
如果省略ORDER BY子句则行的顺序也是未定义的并且可能也是非确定性的。
@ -62,12 +60,145 @@ toc_title: ORDER BY
我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
更少的RAM使用如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则,所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理,如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序,在远程服务器上部分完成排序,并将结果合并到请求者服务器上。 这意味着对于分布式排序,要排序的数据量可以大于单个服务器上的内存量。
如果没有足够的RAM则可以在外部存储器中执行排序在磁盘上创建临时文件。 使用设置 `max_bytes_before_external_sort` 为此目的。 如果将其设置为0默认值则禁用外部排序。 如果启用,则当要排序的数据量达到指定的字节数时,将对收集的数据进行排序并转储到临时文件中。 读取所有数据后,将合并所有已排序的文件并输出结果。 文件被写入到 `/var/lib/clickhouse/tmp/` 目录中的配置(默认情况下,但你可以使用 `tmp_path` 参数来更改此设置)。
运行查询可能占用的内存比 `max_bytes_before_external_sort`. 因此,此设置的值必须大大小于 `max_memory_usage`. 例如如果您的服务器有128GB的RAM并且您需要运行单个查询请设置 `max_memory_usage` 到100GB`max_bytes_before_external_sort` 至80GB。
运行查询可能占用的内存比 `max_bytes_before_external_sort`. 因此,此设置的值必须大大小于 `max_memory_usage`. 例如如果您的服务器有128GB的RAM并且您需要运行单个查询请设置 `max_memory_usage` 到100GB`max_bytes_before_external_sort` 至80GB。
外部排序的工作效率远远低于在RAM中进行排序。
## ORDER BY Expr WITH FILL Modifier {#orderby-with-fill}
此修饰符可以与 [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用.
可以在`ORDER BY expr`之后用可选的`FROM expr``TO expr`和`STEP expr`参数来设置`WITH FILL`修饰符。
所有`expr`列的缺失值将被顺序填充,而其他列将被填充为默认值。
使用以下语法填充多列在ORDER BY部分的每个字段名称后添加带有可选参数的WITH FILL修饰符。
``` sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` 仅适用于具有数字(所有类型的浮点,小数,整数)或日期/日期时间类型的字段。
当未定义 `FROM const_expr` 填充顺序时,则使用 `ORDER BY` 中的最小 `expr` 字段值。
如果未定义 `TO const_expr` 填充顺序,则使用 `ORDER BY` 中的最大`expr`字段值。
当定义了 `STEP const_numeric_expr` 时,对于数字类型,`const_numeric_expr` 将 `as is` 解释为 `days` 作为日期类型,将 `seconds` 解释为DateTime类型。
如果省略了 `STEP const_numeric_expr`,则填充顺序使用 `1.0` 表示数字类型,`1 day`表示日期类型,`1 second` 表示日期时间类型。
例如下面的查询:
``` sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
```
返回
``` text
┌─n─┬─source───┐
│ 1 │ original │
│ 4 │ original │
│ 7 │ original │
└───┴──────────┘
```
但是如果配置了 `WITH FILL` 修饰符
``` sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
```
返回
``` text
┌───n─┬─source───┐
│ 0 │ │
│ 0.5 │ │
│ 1 │ original │
│ 1.5 │ │
│ 2 │ │
│ 2.5 │ │
│ 3 │ │
│ 3.5 │ │
│ 4 │ original │
│ 4.5 │ │
│ 5 │ │
│ 5.5 │ │
│ 7 │ original │
└─────┴──────────┘
```
For the case when we have multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in `ORDER BY` clause.
对于我们有多个字段 `ORDER BY field2 WITH FILL, field1 WITH FILL ` 的情况,填充顺序将遵循` ORDER BY`子句中字段的顺序。
示例:
``` sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d2 WITH FILL,
d1 WITH FILL STEP 5;
```
返回
``` text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-01 │ 1970-01-03 │ │
│ 1970-01-01 │ 1970-01-04 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-01-01 │ 1970-01-06 │ │
│ 1970-01-01 │ 1970-01-07 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
字段 `d1` 没有填充并使用默认值,因为我们没有 `d2` 值的重复值,并且无法正确计算 `d1` 的顺序。
以下查询中`ORDER BY` 中的字段将被更改
``` sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d1 WITH FILL STEP 5,
d2 WITH FILL;
```
返回
``` text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-16 │ 1970-01-01 │ │
│ 1970-01-21 │ 1970-01-01 │ │
│ 1970-01-26 │ 1970-01-01 │ │
│ 1970-01-31 │ 1970-01-01 │ │
│ 1970-02-05 │ 1970-01-01 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-02-15 │ 1970-01-01 │ │
│ 1970-02-20 │ 1970-01-01 │ │
│ 1970-02-25 │ 1970-01-01 │ │
│ 1970-03-02 │ 1970-01-01 │ │
│ 1970-03-07 │ 1970-01-01 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```

View File

@ -1,18 +1,16 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: PREWHERE
---
# PREWHERE条款 {#prewhere-clause}
# PREWHERE 子句 {#prewhere-clause}
Prewhere是更有效地应用过滤的优化。 默认情况下,即使在 `PREWHERE` 子句未显式指定。 它的工作原理是自动移动的一部分 [WHERE](../../../sql-reference/statements/select/where.md) 条件到prewhere阶段。 的作用 `PREWHERE` 子句只是控制这个优化,如果你认为你知道如何做得比默认情况下更好。
Prewhere是更有效地进行过滤的优化。 默认情况下,即使在 `PREWHERE` 子句未显式指定。 它也会自动移动 [WHERE](../../../sql-reference/statements/select/where.md) 条件到prewhere阶段。 `PREWHERE` 子句只是控制这个优化,如果你认为你知道如何做得比默认情况下更好才去控制它
使用prewhere优化首先只读取执行prewhere表达式所需的列。 然后读取运行其余查询所需的其他列但只读取prewhere表达式所在的那些块 “true” 至少对于一些行。 如果有很多块其中prewhere表达式是 “false” 对于所有行和prewhere需要比查询的其他部分更少的列这通常允许从磁盘读取更少的数据以执行查询。
## 手动控制Prewhere {#controlling-prewhere-manually}
条款具有相同的含义 `WHERE` 条款 区别在于从表中读取数据。 当手动控制 `PREWHERE` 对于查询中的少数列使用的过滤条件,但这些过滤条件提供了强大的数据过滤。 这减少了要读取的数据量。
子句具有与 `WHERE` 相同的含义,区别在于从表中读取数据。 当手动控制 `PREWHERE` 对于查询中的少数列使用的过滤条件,但这些过滤条件提供了强大的数据过滤。 这减少了要读取的数据量。
查询可以同时指定 `PREWHERE``WHERE`. 在这种情况下, `PREWHERE` 先于 `WHERE`.
@ -20,4 +18,4 @@ Prewhere是更有效地应用过滤的优化。 默认情况下,即使在 `PRE
## 限制 {#limitations}
`PREWHERE` 只有从表支持 `*MergeTree` 家人
`PREWHERE` 只有支持 `*MergeTree` 族系列引擎的表。

View File

@ -1,37 +1,35 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: SAMPLE
---
# 示例子句 {#select-sample-clause}
# 采样子句 {#select-sample-clause}
`SAMPLE` 子句允许近似 `SELECT` 查询处理。
`SAMPLE` 子句允许近似 `SELECT` 查询处理。
启用数据采样时,不会对所有数据执行查询,而只对特定部分数据(样本)执行查询。 例如如果您需要计算所有访问的统计信息只需对所有访问的1/10分数执行查询然后将结果乘以10即可。
近似查询处理在以下情况下可能很有用:
- 当你有严格的时间requirements如\<100ms但你不能证明额外的硬件资源来满足他们的成本。
- 当你有严格的时间需求(如\<100ms但你不能通过额外的硬件资源来满足他们的成本。
- 当您的原始数据不准确时,所以近似不会明显降低质量。
- 业务需求的目标是近似结果(为了成本效益,或者向高级用户推销确切结果)。
!!! note "注"
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 家庭,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) ,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
下面列出了数据采样的功能:
- 数据采样是一种确定性机制。 同样的结果 `SELECT .. SAMPLE` 查询始终是相同的。
- 对于不同的表,采样工作始终如一。 对于具有单个采样键的表,具有相同系数的采样总是选择相同的可能数据子集。 例如用户Id的示例采用来自不同表的所有可能的用户Id的相同子集的行。 这意味着您可以在子查询中使用示例 [IN](../../../sql-reference/operators/in.md) 条款 此外,您可以使用 [JOIN](../../../sql-reference/statements/select/join.md) 条款
- 对于不同的表,采样工作始终如一。 对于具有单个采样键的表,具有相同系数的采样总是选择相同的可能数据子集。 例如用户Id的示例采用来自不同表的所有可能的用户Id的相同子集的行。 这意味着您可以在子查询中使用采样 [IN](../../../sql-reference/operators/in.md) 此外,您可以使用 [JOIN](../../../sql-reference/statements/select/join.md)
- 采样允许从磁盘读取更少的数据。 请注意,您必须正确指定采样键。 有关详细信息,请参阅 [创建MergeTree表](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table).
`SAMPLE` 子句支持以下语法:
| SAMPLE Clause Syntax | 产品描述 |
|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `SAMPLE k` | 这里 `k` 是从0到1的数字。</br>查询执行于 `k` 数据的分数。 例如, `SAMPLE 0.1` 对10%的数据运行查询。 [碌莽禄more拢more](#select-sample-k) |
| `SAMPLE n` | 这里 `n` 是足够大的整数。</br>该查询是在至少一个样本上执行的 `n` 行(但不超过这个)。 例如, `SAMPLE 10000000` 在至少10,000,000行上运行查询。 [碌莽禄more拢more](#select-sample-n) |
| `SAMPLE k OFFSET m` | 这里 `k``m` 是从0到1的数字。</br>查询在以下示例上执行 `k` 数据的分数。 用于采样的数据由以下偏移 `m` 分数。 [碌莽禄more拢more](#select-sample-offset) |
| `SAMPLE k` | 这里 `k` 是从0到1的数字。</br>查询执行于 `k` 数据的分数。 例如, `SAMPLE 0.1` 对10%的数据运行查询。 [Read more](#select-sample-k) |
| `SAMPLE n` | 这里 `n` 是足够大的整数。</br>该查询是在至少一个样本上执行的 `n` 行(但不超过这个)。 例如, `SAMPLE 10000000` 在至少10,000,000行上运行查询。 [Read more](#select-sample-n) |
| `SAMPLE k OFFSET m` | 这里 `k``m` 是从0到1的数字。</br>查询在以下示例上执行 `k` 数据的分数。 用于采样的数据由以下偏移 `m` 分数。 [Read more](#select-sample-offset) |
## SAMPLE K {#select-sample-k}

View File

@ -1,12 +1,10 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: UNION ALL
---
# UNION ALL条款 {#union-all-clause}
# UNION ALL子句 {#union-all-clause}
您可以使用 `UNION ALL` 结合任意数量的 `SELECT` 通过扩展其结果进行查询。 示例:
你可以使用 `UNION ALL` 结合任意数量的 `SELECT` 来扩展其结果。 示例:
``` sql
SELECT CounterID, 1 AS table, toInt64(count()) AS c
@ -25,12 +23,12 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c
对联合执行类型转换。 例如,如果合并的两个查询具有相同的字段与非-`Nullable` 和 `Nullable` 从兼容类型的类型,由此产生的 `UNION ALL` 有一个 `Nullable` 类型字段。
属于以下部分的查询 `UNION ALL` 不能用圆括号括起来。 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 和 [LIMIT](../../../sql-reference/statements/select/limit.md) 应用于单独的查询,而不是最终结果。 如果您需要将转换应用于最终结果,则可以将所有查询 `UNION ALL` 在子查询中 [FROM](../../../sql-reference/statements/select/from.md) 条款
属于以下部分的查询 `UNION ALL` 不能用圆括号括起来。 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 和 [LIMIT](../../../sql-reference/statements/select/limit.md) 应用于单独的查询,而不是最终结果。 如果您需要将转换应用于最终结果,则可以将所有查询 `UNION ALL` 在子查询中 [FROM](../../../sql-reference/statements/select/from.md) 子句。
## 限制 {#limitations}
只有 `UNION ALL` 支持。 定期的 `UNION` (`UNION DISTINCT`)不支持。 如果你需要 `UNION DISTINCT`,你可以写 `SELECT DISTINCT` 包含 `UNION ALL`.
只有 `UNION ALL` 支持。 `UNION` (`UNION DISTINCT`)不支持。 如果你需要 `UNION DISTINCT`,你可以写 `SELECT DISTINCT` 子查询中包含 `UNION ALL`.
## 实细节 {#implementation-details}
## 实细节 {#implementation-details}
属于以下部分的查询 `UNION ALL` 可以同时运行,并且它们的结果可以混合在一起。
属于 `UNION ALL` 的查询可以同时运行,并且它们的结果可以混合在一起。

View File

@ -1,16 +1,14 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: WHERE
---
# WHERE条款 {#select-where}
# WHERE {#select-where}
`WHERE` 子句允许过滤来自 [FROM](../../../sql-reference/statements/select/from.md) 的条款 `SELECT`.
`WHERE` 子句允许过滤从 [FROM](../../../sql-reference/statements/select/from.md) 子句 `SELECT`.
如果有一个 `WHERE` 子句,它必须包含一个表达式与 `UInt8` 类型。 这通常是一个带有比较和逻辑运算符的表达式。 此表达式计算结果为0的行将从进一步的转换或结果中解释出来。
`WHERE` 如果基础表引擎支持,则根据使用索引和分区修剪的能力评估expression
`WHERE` 如果基础表引擎支持,则根据使用索引和分区修剪的能力评估表达式
!!! note "注"
有一个叫做过滤优化 [去哪里](../../../sql-reference/statements/select/prewhere.md).
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.

View File

@ -1,10 +1,8 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_title: WITH
---
# WITH条款 {#with-clause}
# WITH子句 {#with-clause}
本节提供对公共表表达式的支持 ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)),所以结果 `WITH` 子句可以在其余部分中使用 `SELECT` 查询。

View File

@ -621,6 +621,22 @@ namespace
fs::remove(pid_file);
}
}
else
{
/// Create a directory for pid file.
/// It's created by "install" but we also support cases when ClickHouse is already installed different way.
fs::path pid_path = pid_file;
pid_path.remove_filename();
fs::create_directories(pid_path);
/// All users are allowed to read pid file (for clickhouse status command).
fs::permissions(pid_path, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, fs::perm_options::replace);
{
std::string command = fmt::format("chown --recursive {} '{}'", user, pid_path.string());
fmt::print(" {}\n", command);
executeScript(command);
}
}
std::string command = fmt::format("{} --config-file {} --pid-file {} --daemon",
executable.string(), config.string(), pid_file.string());
@ -655,6 +671,28 @@ namespace
if (try_num == num_tries)
{
fmt::print("Cannot start server. You can execute {} without --daemon option to run manually.\n", command);
fs::path log_path;
{
ConfigProcessor processor(config.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false);
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
if (configuration->has("logger.log"))
log_path = fs::path(configuration->getString("logger.log")).remove_filename();
}
if (log_path.empty())
{
fmt::print("Cannot obtain path to logs (logger.log) from config file {}.\n", config.string());
}
else
{
fs::path stderr_path = log_path;
stderr_path.replace_filename("stderr.log");
fmt::print("Look for logs at {} and for {}.\n", log_path.string(), stderr_path.string());
}
return 3;
}

View File

@ -128,7 +128,6 @@ namespace ErrorCodes
extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
}
@ -215,30 +214,6 @@ void Server::defineOptions(Poco::Util::OptionSet & options)
}
/// Check that there is no user-level settings at the top level in config.
/// This is a common source of mistake (user don't know where to write user-level setting).
void checkForUserSettingsAtTopLevel(const Poco::Util::AbstractConfiguration & config, const std::string & path)
{
if (config.getBool("skip_check_for_incorrect_settings", false))
return;
Settings settings;
for (const auto & setting : settings.all())
{
const auto & name = setting.getName();
if (config.has(name))
{
throw Exception(fmt::format("A setting '{}' appeared at top level in config {}."
" But it is user-level setting that should be located in users.xml inside <profiles> section for specific profile."
" You can add it to <profiles><default> if you want to change default value of this setting."
" You can also disable the check - specify <skip_check_for_incorrect_settings>1</skip_check_for_incorrect_settings>"
" in the main configuration file.",
name, path),
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
}
}
void checkForUsersNotInMainConfig(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_path,
@ -319,7 +294,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
checkForUserSettingsAtTopLevel(config(), config_path);
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
const auto memory_amount = getMemoryAmount();
@ -538,7 +513,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
main_config_zk_changed_event,
[&](ConfigurationPtr config)
{
checkForUserSettingsAtTopLevel(*config, config_path);
Settings::checkNoSettingNamesAtTopLevel(*config, config_path);
// FIXME logging-related things need synchronization -- see the 'Logger * log' saved
// in a lot of places. For now, disable updating log configuration without server restart.
@ -559,48 +534,20 @@ int Server::main(const std::vector<std::string> & /*args*/)
},
/* already_loaded = */ true);
/// Initialize users config reloader.
std::string users_config_path = config().getString("users_config", config_path);
/// If path to users' config isn't absolute, try guess its root (current) dir.
/// At first, try to find it in dir of main config, after will use current dir.
if (users_config_path.empty() || users_config_path[0] != '/')
{
std::string config_dir = Poco::Path(config_path).parent().toString();
if (Poco::File(config_dir + users_config_path).exists())
users_config_path = config_dir + users_config_path;
}
if (users_config_path != config_path)
checkForUsersNotInMainConfig(config(), config_path, users_config_path, log);
auto & access_control = global_context->getAccessControlManager();
if (config().has("custom_settings_prefixes"))
global_context->getAccessControlManager().setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
auto users_config_reloader = std::make_unique<ConfigReloader>(
users_config_path,
include_from_path,
config().getString("path", ""),
zkutil::ZooKeeperNodeCache([&] { return global_context->getZooKeeper(); }),
std::make_shared<Poco::Event>(),
[&](ConfigurationPtr config)
{
global_context->setUsersConfig(config);
checkForUserSettingsAtTopLevel(*config, users_config_path);
},
/* already_loaded = */ false);
/// Initialize access storages.
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
/// Reload config in SYSTEM RELOAD CONFIG query.
global_context->setConfigReloadCallback([&]()
{
main_config_reloader->reload();
users_config_reloader->reload();
access_control.reloadUsersConfigs();
});
/// Sets a local directory storing information about access control.
std::string access_control_local_path = config().getString("access_control_path", "");
if (!access_control_local_path.empty())
global_context->getAccessControlManager().setLocalDirectory(access_control_local_path);
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(config().getInt("max_concurrent_queries", 0));
@ -1069,7 +1016,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
buildLoggers(config(), logger());
main_config_reloader->start();
users_config_reloader->start();
access_control.startPeriodicReloadingUsersConfigs();
if (dns_cache_updater)
dns_cache_updater->start();
@ -1128,7 +1075,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
dns_cache_updater.reset();
main_config_reloader.reset();
users_config_reloader.reset();
if (current_connections)
{

View File

@ -14,6 +14,7 @@
#include <common/find_symbols.h>
#include <Poco/ExpireCache.h>
#include <boost/algorithm/string/join.hpp>
#include <filesystem>
#include <mutex>
@ -21,25 +22,32 @@ namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int UNKNOWN_SETTING;
}
namespace
{
std::vector<std::unique_ptr<IAccessStorage>> createStorages()
void checkForUsersNotInMainConfig(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_path,
const std::string & users_config_path,
Poco::Logger * log)
{
std::vector<std::unique_ptr<IAccessStorage>> list;
list.emplace_back(std::make_unique<DiskAccessStorage>());
list.emplace_back(std::make_unique<UsersConfigAccessStorage>());
if (config.getBool("skip_check_for_incorrect_settings", false))
return;
#if 0 /// Memory access storage is disabled.
list.emplace_back(std::make_unique<MemoryAccessStorage>());
#endif
return list;
if (config.has("users") || config.has("profiles") || config.has("quotas"))
{
/// We cannot throw exception here, because we have support for obsolete 'conf.d' directory
/// (that does not correspond to config.d or users.d) but substitute configuration to both of them.
LOG_ERROR(log, "The <users>, <profiles> and <quotas> elements should be located in users config file: {} not in main config {}."
" Also note that you should place configuration changes to the appropriate *.d directory like 'users.d'.",
users_config_path, config_path);
}
}
constexpr size_t DISK_ACCESS_STORAGE_INDEX = 0;
constexpr size_t USERS_CONFIG_ACCESS_STORAGE_INDEX = 1;
}
@ -114,7 +122,7 @@ private:
AccessControlManager::AccessControlManager()
: MultipleAccessStorage(createStorages()),
: MultipleAccessStorage("user directories"),
context_access_cache(std::make_unique<ContextAccessCache>(*this)),
role_cache(std::make_unique<RoleCache>(*this)),
row_policy_cache(std::make_unique<RowPolicyCache>(*this)),
@ -123,20 +131,191 @@ AccessControlManager::AccessControlManager()
external_authenticators(std::make_unique<ExternalAuthenticators>()),
custom_settings_prefixes(std::make_unique<CustomSettingsPrefixes>())
{
/// Allow UsersConfigAccessStorage to check the names of settings which it will read from users.xml.
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto & users_config_access_storage = dynamic_cast<UsersConfigAccessStorage &>(getStorageByIndex(USERS_CONFIG_ACCESS_STORAGE_INDEX));
users_config_access_storage.setCheckSettingNameFunction(check_setting_name_function);
}
AccessControlManager::~AccessControlManager() = default;
void AccessControlManager::setLocalDirectory(const String & directory_path)
void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
{
auto & disk_access_storage = dynamic_cast<DiskAccessStorage &>(getStorageByIndex(DISK_ACCESS_STORAGE_INDEX));
disk_access_storage.setDirectory(directory_path);
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto users_config_storage = typeid_cast<std::shared_ptr<UsersConfigAccessStorage>>(storage))
{
users_config_storage->setConfig(users_config_);
return;
}
}
addUsersConfigStorage(users_config_);
}
void AccessControlManager::addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_)
{
addUsersConfigStorage(UsersConfigAccessStorage::STORAGE_TYPE, users_config_);
}
void AccessControlManager::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
{
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
new_storage->setConfig(users_config_);
addStorage(new_storage);
}
void AccessControlManager::addUsersConfigStorage(
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
addUsersConfigStorage(
UsersConfigAccessStorage::STORAGE_TYPE, users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
}
void AccessControlManager::addUsersConfigStorage(
const String & storage_name_,
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
addStorage(new_storage);
}
void AccessControlManager::reloadUsersConfigs()
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto users_config_storage = typeid_cast<std::shared_ptr<UsersConfigAccessStorage>>(storage))
users_config_storage->reload();
}
}
void AccessControlManager::startPeriodicReloadingUsersConfigs()
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto users_config_storage = typeid_cast<std::shared_ptr<UsersConfigAccessStorage>>(storage))
users_config_storage->startPeriodicReloading();
}
}
void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_)
{
addStorage(std::make_shared<DiskAccessStorage>(directory_, readonly_));
}
void AccessControlManager::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_)
{
addStorage(std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_));
}
void AccessControlManager::addMemoryStorage(const String & storage_name_)
{
addStorage(std::make_shared<MemoryAccessStorage>(storage_name_));
}
void AccessControlManager::addStoragesFromUserDirectoriesConfig(
const Poco::Util::AbstractConfiguration & config,
const String & key,
const String & config_dir,
const String & dbms_dir,
const String & include_from_path,
const zkutil::GetZooKeeper & get_zookeeper_function)
{
Strings keys_in_user_directories;
config.keys(key, keys_in_user_directories);
for (const String & key_in_user_directories : keys_in_user_directories)
{
String prefix = key + "." + key_in_user_directories;
String type = key_in_user_directories;
if (size_t bracket_pos = type.find('['); bracket_pos != String::npos)
type.resize(bracket_pos);
if ((type == "users_xml") || (type == "users_config"))
type = UsersConfigAccessStorage::STORAGE_TYPE;
else if ((type == "local") || (type == "local_directory"))
type = DiskAccessStorage::STORAGE_TYPE;
String name = config.getString(prefix + ".name", type);
if (type == MemoryAccessStorage::STORAGE_TYPE)
{
addMemoryStorage(name);
}
else if (type == UsersConfigAccessStorage::STORAGE_TYPE)
{
String path = config.getString(prefix + ".path");
if (std::filesystem::path{path}.is_relative() && std::filesystem::exists(config_dir + path))
path = config_dir + path;
addUsersConfigStorage(name, path, include_from_path, dbms_dir, get_zookeeper_function);
}
else if (type == DiskAccessStorage::STORAGE_TYPE)
{
String path = config.getString(prefix + ".path");
bool readonly = config.getBool(prefix + ".readonly", false);
addDiskStorage(name, path, readonly);
}
else
throw Exception("Unknown storage type '" + type + "' at " + prefix + " in config", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
}
void AccessControlManager::addStoragesFromMainConfig(
const Poco::Util::AbstractConfiguration & config,
const String & config_path,
const zkutil::GetZooKeeper & get_zookeeper_function)
{
String config_dir = std::filesystem::path{config_path}.remove_filename().string();
String dbms_dir = config.getString("path", DBMS_DEFAULT_PATH);
String include_from_path = config.getString("include_from", "/etc/metrika.xml");
if (config.has("user_directories"))
{
if (config.has("users_config"))
LOG_WARNING(getLogger(), "<user_directories> is specified, the path from <users_config> won't be used: " + config.getString("users_config"));
if (config.has("access_control_path"))
LOG_WARNING(getLogger(), "<access_control_path> is specified, the path from <access_control_path> won't be used: " + config.getString("access_control_path"));
addStoragesFromUserDirectoriesConfig(
config,
"user_directories",
config_dir,
dbms_dir,
include_from_path,
get_zookeeper_function);
}
else
{
/// If path to users' config isn't absolute, try guess its root (current) dir.
/// At first, try to find it in dir of main config, after will use current dir.
String users_config_path = config.getString("users_config", "");
if (users_config_path.empty())
users_config_path = config_path;
else if (std::filesystem::path{users_config_path}.is_relative() && std::filesystem::exists(config_dir + users_config_path))
users_config_path = config_dir + users_config_path;
if (users_config_path != config_path)
checkForUsersNotInMainConfig(config, config_path, users_config_path, getLogger());
addUsersConfigStorage(users_config_path, include_from_path, dbms_dir, get_zookeeper_function);
String disk_storage_dir = config.getString("access_control_path", "");
if (!disk_storage_dir.empty())
addDiskStorage(disk_storage_dir);
}
}
@ -146,13 +325,6 @@ void AccessControlManager::setExternalAuthenticatorsConfig(const Poco::Util::Abs
}
void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config)
{
auto & users_config_access_storage = dynamic_cast<UsersConfigAccessStorage &>(getStorageByIndex(USERS_CONFIG_ACCESS_STORAGE_INDEX));
users_config_access_storage.setConfiguration(users_config);
}
void AccessControlManager::setDefaultProfileName(const String & default_profile_name)
{
settings_profiles_cache->setDefaultProfileName(default_profile_name);

View File

@ -2,6 +2,7 @@
#include <Access/MultipleAccessStorage.h>
#include <Common/SettingsChanges.h>
#include <Common/ZooKeeper/Common.h>
#include <boost/container/flat_set.hpp>
#include <memory>
@ -48,9 +49,54 @@ public:
AccessControlManager();
~AccessControlManager();
void setLocalDirectory(const String & directory);
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config);
/// Parses access entities from a configuration loaded from users.xml.
/// This function add UsersConfigAccessStorage if it wasn't added before.
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
/// Adds UsersConfigAccessStorage.
void addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_);
void addUsersConfigStorage(const String & storage_name_,
const Poco::Util::AbstractConfiguration & users_config_);
void addUsersConfigStorage(const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_ = {});
void addUsersConfigStorage(const String & storage_name_,
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_ = {});
void reloadUsersConfigs();
void startPeriodicReloadingUsersConfigs();
/// Loads access entities from the directory on the local disk.
/// Use that directory to keep created users/roles/etc.
void addDiskStorage(const String & directory_, bool readonly_ = false);
void addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_ = false);
/// Adds MemoryAccessStorage which keeps access entities in memory.
void addMemoryStorage();
void addMemoryStorage(const String & storage_name_);
/// Adds storages from <users_directories> config.
void addStoragesFromUserDirectoriesConfig(const Poco::Util::AbstractConfiguration & config,
const String & key,
const String & config_dir,
const String & dbms_dir,
const String & include_from_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
/// Adds storages from the main config.
void addStoragesFromMainConfig(const Poco::Util::AbstractConfiguration & config,
const String & config_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
/// Sets the default profile's name.
/// The default profile's settings are always applied before any other profile's.
void setDefaultProfileName(const String & default_profile_name);
/// Sets prefixes which should be used for custom settings.
@ -60,6 +106,8 @@ public:
bool isSettingNameAllowed(const std::string_view & name) const;
void checkSettingNameIsAllowed(const std::string_view & name) const;
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
std::shared_ptr<const ContextAccess> getContextAccess(
const UUID & user_id,
const boost::container::flat_set<UUID> & current_roles,
@ -96,8 +144,10 @@ public:
const ExternalAuthenticators & getExternalAuthenticators() const;
private: class ContextAccessCache;
private:
class ContextAccessCache;
class CustomSettingsPrefixes;
std::unique_ptr<ContextAccessCache> context_access_cache;
std::unique_ptr<RoleCache> role_cache;
std::unique_ptr<RowPolicyCache> row_policy_cache;

View File

@ -47,7 +47,6 @@ namespace ErrorCodes
extern const int DIRECTORY_DOESNT_EXIST;
extern const int FILE_DOESNT_EXIST;
extern const int INCORRECT_ACCESS_ENTITY_DEFINITION;
extern const int LOGICAL_ERROR;
}
@ -86,7 +85,7 @@ namespace
/// Reads a file containing ATTACH queries and then parses it to build an access entity.
AccessEntityPtr readEntityFile(const std::filesystem::path & file_path)
AccessEntityPtr readEntityFile(const String & file_path)
{
/// Read the file.
ReadBufferFromFile in{file_path};
@ -119,42 +118,42 @@ namespace
if (auto * create_user_query = query->as<ASTCreateUserQuery>())
{
if (res)
throw Exception("Two access entities in one file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities in one file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = user = std::make_unique<User>();
InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query);
}
else if (auto * create_role_query = query->as<ASTCreateRoleQuery>())
{
if (res)
throw Exception("Two access entities in one file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities in one file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = role = std::make_unique<Role>();
InterpreterCreateRoleQuery::updateRoleFromQuery(*role, *create_role_query);
}
else if (auto * create_policy_query = query->as<ASTCreateRowPolicyQuery>())
{
if (res)
throw Exception("Two access entities in one file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities in one file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = policy = std::make_unique<RowPolicy>();
InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(*policy, *create_policy_query);
}
else if (auto * create_quota_query = query->as<ASTCreateQuotaQuery>())
{
if (res)
throw Exception("Two access entities are attached in the same file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities are attached in the same file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = quota = std::make_unique<Quota>();
InterpreterCreateQuotaQuery::updateQuotaFromQuery(*quota, *create_quota_query);
}
else if (auto * create_profile_query = query->as<ASTCreateSettingsProfileQuery>())
{
if (res)
throw Exception("Two access entities are attached in the same file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities are attached in the same file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = profile = std::make_unique<SettingsProfile>();
InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(*profile, *create_profile_query);
}
else if (auto * grant_query = query->as<ASTGrantQuery>())
{
if (!user && !role)
throw Exception("A user or role should be attached before grant in file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("A user or role should be attached before grant in file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
if (user)
InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query);
else
@ -165,13 +164,13 @@ namespace
}
if (!res)
throw Exception("No access entities attached in file " + file_path.string(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("No access entities attached in file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
return res;
}
AccessEntityPtr tryReadEntityFile(const std::filesystem::path & file_path, Poco::Logger & log)
AccessEntityPtr tryReadEntityFile(const String & file_path, Poco::Logger & log)
{
try
{
@ -179,14 +178,14 @@ namespace
}
catch (...)
{
tryLogCurrentException(&log, "Could not parse " + file_path.string());
tryLogCurrentException(&log, "Could not parse " + file_path);
return nullptr;
}
}
/// Writes ATTACH queries for building a specified access entity to a file.
void writeEntityFile(const std::filesystem::path & file_path, const IAccessEntity & entity)
void writeEntityFile(const String & file_path, const IAccessEntity & entity)
{
/// Build list of ATTACH queries.
ASTs queries;
@ -220,14 +219,14 @@ namespace
/// Calculates the path to a file named <id>.sql for saving an access entity.
std::filesystem::path getEntityFilePath(const String & directory_path, const UUID & id)
String getEntityFilePath(const String & directory_path, const UUID & id)
{
return std::filesystem::path(directory_path).append(toString(id)).replace_extension(".sql");
return directory_path + toString(id) + ".sql";
}
/// Reads a map of name of access entity to UUID for access entities of some type from a file.
std::vector<std::pair<UUID, String>> readListFile(const std::filesystem::path & file_path)
std::vector<std::pair<UUID, String>> readListFile(const String & file_path)
{
ReadBufferFromFile in(file_path);
@ -250,7 +249,7 @@ namespace
/// Writes a map of name of access entity to UUID for access entities of some type to a file.
void writeListFile(const std::filesystem::path & file_path, const std::vector<std::pair<UUID, std::string_view>> & id_name_pairs)
void writeListFile(const String & file_path, const std::vector<std::pair<UUID, std::string_view>> & id_name_pairs)
{
WriteBufferFromFile out(file_path);
writeVarUInt(id_name_pairs.size(), out);
@ -263,20 +262,19 @@ namespace
/// Calculates the path for storing a map of name of access entity to UUID for access entities of some type.
std::filesystem::path getListFilePath(const String & directory_path, EntityType type)
String getListFilePath(const String & directory_path, EntityType type)
{
String file_name = EntityTypeInfo::get(type).plural_raw_name;
boost::to_lower(file_name);
file_name += ".list";
return std::filesystem::path(directory_path).append(file_name);
return directory_path + file_name + ".list";
}
/// Calculates the path to a temporary file which existence means that list files are corrupted
/// and need to be rebuild.
std::filesystem::path getNeedRebuildListsMarkFilePath(const String & directory_path)
String getNeedRebuildListsMarkFilePath(const String & directory_path)
{
return std::filesystem::path(directory_path).append("need_rebuild_lists.mark");
return directory_path + "need_rebuild_lists.mark";
}
@ -295,39 +293,18 @@ namespace
}
DiskAccessStorage::DiskAccessStorage()
: IAccessStorage("disk")
DiskAccessStorage::DiskAccessStorage(const String & directory_path_, bool readonly_)
: DiskAccessStorage(STORAGE_TYPE, directory_path_, readonly_)
{
}
DiskAccessStorage::~DiskAccessStorage()
{
stopListsWritingThread();
writeLists();
}
void DiskAccessStorage::setDirectory(const String & directory_path_)
{
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock{mutex};
initialize(directory_path_, notifications);
}
void DiskAccessStorage::initialize(const String & directory_path_, Notifications & notifications)
DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_)
: IAccessStorage(storage_name_)
{
auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path_);
if (initialized)
{
if (directory_path == canonical_directory_path)
return;
throw Exception("Storage " + getStorageName() + " already initialized with another directory", ErrorCodes::LOGICAL_ERROR);
}
if (canonical_directory_path.has_filename())
canonical_directory_path += std::filesystem::path::preferred_separator;
std::error_code create_dir_error_code;
std::filesystem::create_directories(canonical_directory_path, create_dir_error_code);
@ -336,7 +313,7 @@ void DiskAccessStorage::initialize(const String & directory_path_, Notifications
throw Exception("Couldn't create directory " + canonical_directory_path.string() + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST);
directory_path = canonical_directory_path;
initialized = true;
readonly = readonly_;
bool should_rebuild_lists = std::filesystem::exists(getNeedRebuildListsMarkFilePath(directory_path));
if (!should_rebuild_lists)
@ -350,9 +327,13 @@ void DiskAccessStorage::initialize(const String & directory_path_, Notifications
rebuildLists();
writeLists();
}
}
for (const auto & [id, entry] : entries_by_id)
prepareNotifications(id, entry, false, notifications);
DiskAccessStorage::~DiskAccessStorage()
{
stopListsWritingThread();
writeLists();
}
@ -375,7 +356,7 @@ bool DiskAccessStorage::readLists()
auto file_path = getListFilePath(directory_path, type);
if (!std::filesystem::exists(file_path))
{
LOG_WARNING(getLogger(), "File {} doesn't exist", file_path.string());
LOG_WARNING(getLogger(), "File {} doesn't exist", file_path);
ok = false;
break;
}
@ -393,7 +374,7 @@ bool DiskAccessStorage::readLists()
}
catch (...)
{
tryLogCurrentException(getLogger(), "Could not read " + file_path.string());
tryLogCurrentException(getLogger(), "Could not read " + file_path);
ok = false;
break;
}
@ -428,7 +409,7 @@ bool DiskAccessStorage::writeLists()
}
catch (...)
{
tryLogCurrentException(getLogger(), "Could not write " + file_path.string());
tryLogCurrentException(getLogger(), "Could not write " + file_path);
failed_to_write_lists = true;
types_of_lists_to_write.clear();
return false;
@ -598,7 +579,7 @@ String DiskAccessStorage::readNameImpl(const UUID & id) const
bool DiskAccessStorage::canInsertImpl(const AccessEntityPtr &) const
{
return initialized;
return !readonly;
}
@ -609,7 +590,7 @@ UUID DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool repl
UUID id = generateRandomID();
std::lock_guard lock{mutex};
insertNoLock(generateRandomID(), new_entity, replace_if_exists, notifications);
insertNoLock(id, new_entity, replace_if_exists, notifications);
return id;
}
@ -618,11 +599,9 @@ void DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
{
const String & name = new_entity->getName();
EntityType type = new_entity->getType();
if (!initialized)
throw Exception(
"Cannot insert " + new_entity->outputTypeAndName() + " to storage [" + getStorageName()
+ "] because the output directory is not set",
ErrorCodes::LOGICAL_ERROR);
if (readonly)
throwReadonlyCannotInsert(type, name);
/// Check that we can insert.
auto it_by_id = entries_by_id.find(id);
@ -675,6 +654,9 @@ void DiskAccessStorage::removeNoLock(const UUID & id, Notifications & notificati
Entry & entry = it->second;
EntityType type = entry.type;
if (readonly)
throwReadonlyCannotRemove(type, entry.name);
scheduleWriteLists(type);
deleteAccessEntityOnDisk(id);
@ -703,6 +685,8 @@ void DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_
throwNotFound(id);
Entry & entry = it->second;
if (readonly)
throwReadonlyCannotUpdate(entry.type, entry.name);
if (!entry.entity)
entry.entity = readAccessEntityFromDisk(id);
auto old_entity = entry.entity;
@ -757,7 +741,7 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
{
auto file_path = getEntityFilePath(directory_path, id);
if (!std::filesystem::remove(file_path))
throw Exception("Couldn't delete " + file_path.string(), ErrorCodes::FILE_DOESNT_EXIST);
throw Exception("Couldn't delete " + file_path, ErrorCodes::FILE_DOESNT_EXIST);
}

View File

@ -11,10 +11,15 @@ namespace DB
class DiskAccessStorage : public IAccessStorage
{
public:
DiskAccessStorage();
static constexpr char STORAGE_TYPE[] = "local directory";
DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_ = false);
DiskAccessStorage(const String & directory_path_, bool readonly_ = false);
~DiskAccessStorage() override;
void setDirectory(const String & directory_path_);
const char * getStorageType() const override { return STORAGE_TYPE; }
String getStoragePath() const override { return directory_path; }
bool isStorageReadOnly() const override { return readonly; }
private:
std::optional<UUID> findImpl(EntityType type, const String & name) const override;
@ -31,7 +36,6 @@ private:
bool hasSubscriptionImpl(const UUID & id) const override;
bool hasSubscriptionImpl(EntityType type) const override;
void initialize(const String & directory_path_, Notifications & notifications);
void clear();
bool readLists();
bool writeLists();
@ -63,7 +67,7 @@ private:
void prepareNotifications(const UUID & id, const Entry & entry, bool remove, Notifications & notifications) const;
String directory_path;
bool initialized = false;
bool readonly;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(EntityType::MAX)];
boost::container::flat_set<EntityType> types_of_lists_to_write;

View File

@ -432,14 +432,14 @@ Poco::Logger * IAccessStorage::getLogger() const
void IAccessStorage::throwNotFound(const UUID & id) const
{
throw Exception(outputID(id) + " not found in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_NOT_FOUND);
throw Exception(outputID(id) + " not found in " + getStorageName(), ErrorCodes::ACCESS_ENTITY_NOT_FOUND);
}
void IAccessStorage::throwNotFound(EntityType type, const String & name) const
{
int error_code = EntityTypeInfo::get(type).not_found_error_code;
throw Exception("There is no " + outputEntityTypeAndName(type, name) + " in [" + getStorageName() + "]", error_code);
throw Exception("There is no " + outputEntityTypeAndName(type, name) + " in " + getStorageName(), error_code);
}
@ -455,7 +455,7 @@ void IAccessStorage::throwIDCollisionCannotInsert(const UUID & id, EntityType ty
{
throw Exception(
outputEntityTypeAndName(type, name) + ": cannot insert because the " + outputID(id) + " is already used by "
+ outputEntityTypeAndName(existing_type, existing_name) + " in [" + getStorageName() + "]",
+ outputEntityTypeAndName(existing_type, existing_name) + " in " + getStorageName(),
ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS);
}
@ -463,8 +463,8 @@ void IAccessStorage::throwIDCollisionCannotInsert(const UUID & id, EntityType ty
void IAccessStorage::throwNameCollisionCannotInsert(EntityType type, const String & name) const
{
throw Exception(
outputEntityTypeAndName(type, name) + ": cannot insert because " + outputEntityTypeAndName(type, name) + " already exists in ["
+ getStorageName() + "]",
outputEntityTypeAndName(type, name) + ": cannot insert because " + outputEntityTypeAndName(type, name) + " already exists in "
+ getStorageName(),
ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS);
}
@ -473,7 +473,7 @@ void IAccessStorage::throwNameCollisionCannotRename(EntityType type, const Strin
{
throw Exception(
outputEntityTypeAndName(type, old_name) + ": cannot rename to " + backQuote(new_name) + " because "
+ outputEntityTypeAndName(type, new_name) + " already exists in [" + getStorageName() + "]",
+ outputEntityTypeAndName(type, new_name) + " already exists in " + getStorageName(),
ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS);
}
@ -481,7 +481,7 @@ void IAccessStorage::throwNameCollisionCannotRename(EntityType type, const Strin
void IAccessStorage::throwReadonlyCannotInsert(EntityType type, const String & name) const
{
throw Exception(
"Cannot insert " + outputEntityTypeAndName(type, name) + " to [" + getStorageName() + "] because this storage is readonly",
"Cannot insert " + outputEntityTypeAndName(type, name) + " to " + getStorageName() + " because this storage is readonly",
ErrorCodes::ACCESS_STORAGE_READONLY);
}
@ -489,7 +489,7 @@ void IAccessStorage::throwReadonlyCannotInsert(EntityType type, const String & n
void IAccessStorage::throwReadonlyCannotUpdate(EntityType type, const String & name) const
{
throw Exception(
"Cannot update " + outputEntityTypeAndName(type, name) + " in [" + getStorageName() + "] because this storage is readonly",
"Cannot update " + outputEntityTypeAndName(type, name) + " in " + getStorageName() + " because this storage is readonly",
ErrorCodes::ACCESS_STORAGE_READONLY);
}
@ -497,7 +497,7 @@ void IAccessStorage::throwReadonlyCannotUpdate(EntityType type, const String & n
void IAccessStorage::throwReadonlyCannotRemove(EntityType type, const String & name) const
{
throw Exception(
"Cannot remove " + outputEntityTypeAndName(type, name) + " from [" + getStorageName() + "] because this storage is readonly",
"Cannot remove " + outputEntityTypeAndName(type, name) + " from " + getStorageName() + " because this storage is readonly",
ErrorCodes::ACCESS_STORAGE_READONLY);
}
}

View File

@ -24,6 +24,9 @@ public:
/// Returns the name of this storage.
const String & getStorageName() const { return storage_name; }
virtual const char * getStorageType() const = 0;
virtual String getStoragePath() const { return {}; }
virtual bool isStorageReadOnly() const { return false; }
using EntityType = IAccessEntity::Type;
using EntityTypeInfo = IAccessEntity::TypeInfo;

View File

@ -171,6 +171,7 @@ void MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & updat
if (it2 != entries_by_name.end())
throwNameCollisionCannotRename(old_entity->getType(), old_entity->getName(), new_entity->getName());
entries_by_name.erase(old_entity->getName());
entries_by_name[new_entity->getName()] = &entry;
}

View File

@ -13,7 +13,11 @@ namespace DB
class MemoryAccessStorage : public IAccessStorage
{
public:
MemoryAccessStorage(const String & storage_name_ = "memory");
static constexpr char STORAGE_TYPE[] = "memory";
MemoryAccessStorage(const String & storage_name_ = STORAGE_TYPE);
const char * getStorageType() const override { return STORAGE_TYPE; }
/// Sets all entities at once.
void setAll(const std::vector<AccessEntityPtr> & all_entities);

View File

@ -1,89 +1,116 @@
#include <Access/MultipleAccessStorage.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <ext/range.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm/find.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int ACCESS_ENTITY_FOUND_DUPLICATES;
extern const int ACCESS_STORAGE_FOR_INSERTION_NOT_FOUND;
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
}
namespace
{
template <typename StoragePtrT>
String joinStorageNames(const std::vector<StoragePtrT> & storages)
{
String result;
for (const auto & storage : storages)
{
if (!result.empty())
result += ", ";
result += storage->getStorageName();
}
return result;
}
}
using Storage = IAccessStorage;
using StoragePtr = std::shared_ptr<Storage>;
using ConstStoragePtr = std::shared_ptr<const Storage>;
using Storages = std::vector<StoragePtr>;
MultipleAccessStorage::MultipleAccessStorage(
std::vector<std::unique_ptr<Storage>> nested_storages_)
: IAccessStorage(joinStorageNames(nested_storages_))
, nested_storages(std::move(nested_storages_))
MultipleAccessStorage::MultipleAccessStorage(const String & storage_name_)
: IAccessStorage(storage_name_)
, nested_storages(std::make_shared<Storages>())
, ids_cache(512 /* cache size */)
{
}
std::vector<UUID> MultipleAccessStorage::findMultiple(EntityType type, const String & name) const
void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages)
{
std::vector<UUID> ids;
for (const auto & nested_storage : nested_storages)
{
auto id = nested_storage->find(type, name);
if (id)
{
std::lock_guard lock{ids_cache_mutex};
ids_cache.set(*id, std::make_shared<Storage *>(nested_storage.get()));
ids.push_back(*id);
}
}
return ids;
std::unique_lock lock{mutex};
nested_storages = std::make_shared<const Storages>(storages);
ids_cache.reset();
updateSubscriptionsToNestedStorages(lock);
}
void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
{
std::unique_lock lock{mutex};
if (boost::range::find(*nested_storages, new_storage) != nested_storages->end())
return;
auto new_storages = std::make_shared<Storages>(*nested_storages);
new_storages->push_back(new_storage);
nested_storages = new_storages;
updateSubscriptionsToNestedStorages(lock);
}
void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove)
{
std::unique_lock lock{mutex};
auto it = boost::range::find(*nested_storages, storage_to_remove);
if (it == nested_storages->end())
return;
size_t index = it - nested_storages->begin();
auto new_storages = std::make_shared<Storages>(*nested_storages);
new_storages->erase(new_storages->begin() + index);
nested_storages = new_storages;
ids_cache.reset();
updateSubscriptionsToNestedStorages(lock);
}
std::vector<StoragePtr> MultipleAccessStorage::getStorages()
{
return *getStoragesPtr();
}
std::vector<ConstStoragePtr> MultipleAccessStorage::getStorages() const
{
auto storages = getStoragesInternal();
std::vector<ConstStoragePtr> res;
res.reserve(storages->size());
boost::range::copy(*storages, std::back_inserter(res));
return res;
}
std::shared_ptr<const Storages> MultipleAccessStorage::getStoragesPtr()
{
return getStoragesInternal();
}
std::shared_ptr<const Storages> MultipleAccessStorage::getStoragesInternal() const
{
std::lock_guard lock{mutex};
return nested_storages;
}
std::optional<UUID> MultipleAccessStorage::findImpl(EntityType type, const String & name) const
{
auto ids = findMultiple(type, name);
if (ids.empty())
return {};
if (ids.size() == 1)
return ids[0];
std::vector<const Storage *> storages_with_duplicates;
for (const auto & id : ids)
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
const auto * storage = findStorage(id);
if (storage)
storages_with_duplicates.push_back(storage);
auto id = storage->find(type, name);
if (id)
{
std::lock_guard lock{mutex};
ids_cache.set(*id, storage);
return id;
}
}
throw Exception(
"Found " + outputEntityTypeAndName(type, name) + " in " + std::to_string(ids.size())
+ " storages [" + joinStorageNames(storages_with_duplicates) + "]",
ErrorCodes::ACCESS_ENTITY_FOUND_DUPLICATES);
return {};
}
std::vector<UUID> MultipleAccessStorage::findAllImpl(EntityType type) const
{
std::vector<UUID> all_ids;
for (const auto & nested_storage : nested_storages)
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
auto ids = nested_storage->findAll(type);
auto ids = storage->findAll(type);
all_ids.insert(all_ids.end(), std::make_move_iterator(ids.begin()), std::make_move_iterator(ids.end()));
}
return all_ids;
@ -96,26 +123,24 @@ bool MultipleAccessStorage::existsImpl(const UUID & id) const
}
IAccessStorage * MultipleAccessStorage::findStorage(const UUID & id)
StoragePtr MultipleAccessStorage::findStorage(const UUID & id)
{
StoragePtr from_cache;
{
std::lock_guard lock{ids_cache_mutex};
auto from_cache = ids_cache.get(id);
if (from_cache)
{
auto * storage = *from_cache;
if (storage->exists(id))
return storage;
}
std::lock_guard lock{mutex};
from_cache = ids_cache.get(id);
}
if (from_cache && from_cache->exists(id))
return from_cache;
for (const auto & nested_storage : nested_storages)
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (nested_storage->exists(id))
if (storage->exists(id))
{
std::lock_guard lock{ids_cache_mutex};
ids_cache.set(id, std::make_shared<Storage *>(nested_storage.get()));
return nested_storage.get();
std::lock_guard lock{mutex};
ids_cache.set(id, storage);
return storage;
}
}
@ -123,52 +148,44 @@ IAccessStorage * MultipleAccessStorage::findStorage(const UUID & id)
}
const IAccessStorage * MultipleAccessStorage::findStorage(const UUID & id) const
ConstStoragePtr MultipleAccessStorage::findStorage(const UUID & id) const
{
return const_cast<MultipleAccessStorage *>(this)->findStorage(id);
}
IAccessStorage & MultipleAccessStorage::getStorage(const UUID & id)
StoragePtr MultipleAccessStorage::getStorage(const UUID & id)
{
auto * storage = findStorage(id);
auto storage = findStorage(id);
if (storage)
return *storage;
return storage;
throwNotFound(id);
}
const IAccessStorage & MultipleAccessStorage::getStorage(const UUID & id) const
ConstStoragePtr MultipleAccessStorage::getStorage(const UUID & id) const
{
return const_cast<MultipleAccessStorage *>(this)->getStorage(id);
}
void MultipleAccessStorage::addStorage(std::unique_ptr<Storage> nested_storage)
{
/// Note that IStorage::storage_name is not changed. It is ok as this method
/// is considered as a temporary solution allowing third-party Arcadia applications
/// using CH as a library to register their own access storages. Do not remove
/// this method without providing any alternative :)
nested_storages.emplace_back(std::move(nested_storage));
}
AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id) const
{
return getStorage(id).read(id);
return getStorage(id)->read(id);
}
String MultipleAccessStorage::readNameImpl(const UUID & id) const
{
return getStorage(id).readName(id);
return getStorage(id)->readName(id);
}
bool MultipleAccessStorage::canInsertImpl(const AccessEntityPtr & entity) const
{
for (const auto & nested_storage : nested_storages)
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (nested_storage->canInsert(entity))
if (storage->canInsert(entity))
return true;
}
return false;
@ -177,74 +194,202 @@ bool MultipleAccessStorage::canInsertImpl(const AccessEntityPtr & entity) const
UUID MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists)
{
IAccessStorage * nested_storage_for_insertion = nullptr;
for (const auto & nested_storage : nested_storages)
auto storages = getStoragesInternal();
std::shared_ptr<IAccessStorage> storage_for_insertion;
for (const auto & storage : *storages)
{
if (nested_storage->canInsert(entity))
if (storage->canInsert(entity) ||
storage->find(entity->getType(), entity->getName()))
{
nested_storage_for_insertion = nested_storage.get();
storage_for_insertion = storage;
break;
}
}
if (!nested_storage_for_insertion)
if (!storage_for_insertion)
throw Exception("Not found a storage to insert " + entity->outputTypeAndName(), ErrorCodes::ACCESS_STORAGE_FOR_INSERTION_NOT_FOUND);
auto id = replace_if_exists ? nested_storage_for_insertion->insertOrReplace(entity) : nested_storage_for_insertion->insert(entity);
std::lock_guard lock{ids_cache_mutex};
ids_cache.set(id, std::make_shared<Storage *>(nested_storage_for_insertion));
auto id = replace_if_exists ? storage_for_insertion->insertOrReplace(entity) : storage_for_insertion->insert(entity);
std::lock_guard lock{mutex};
ids_cache.set(id, storage_for_insertion);
return id;
}
void MultipleAccessStorage::removeImpl(const UUID & id)
{
getStorage(id).remove(id);
getStorage(id)->remove(id);
}
void MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func)
{
getStorage(id).update(id, update_func);
auto storage_for_updating = getStorage(id);
/// If the updating involves renaming check that the renamed entity will be accessible by name.
auto storages = getStoragesInternal();
if ((storages->size() > 1) && (storages->front() != storage_for_updating))
{
auto old_entity = storage_for_updating->read(id);
auto new_entity = update_func(old_entity);
if (new_entity->getName() != old_entity->getName())
{
for (const auto & storage : *storages)
{
if (storage == storage_for_updating)
break;
if (storage->find(new_entity->getType(), new_entity->getName()))
{
throw Exception(
old_entity->outputTypeAndName() + ": cannot rename to " + backQuote(new_entity->getName()) + " because "
+ new_entity->outputTypeAndName() + " already exists in " + storage->getStorageName(),
ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS);
}
}
}
}
storage_for_updating->update(id, update_func);
}
ext::scope_guard MultipleAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
{
const auto * storage = findStorage(id);
auto storage = findStorage(id);
if (!storage)
return {};
return storage->subscribeForChanges(id, handler);
}
ext::scope_guard MultipleAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
ext::scope_guard subscriptions;
for (const auto & nested_storage : nested_storages)
subscriptions.join(nested_storage->subscribeForChanges(type, handler));
return subscriptions;
}
bool MultipleAccessStorage::hasSubscriptionImpl(const UUID & id) const
{
for (const auto & nested_storage : nested_storages)
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (nested_storage->hasSubscription(id))
if (storage->hasSubscription(id))
return true;
}
return false;
}
ext::scope_guard MultipleAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
std::unique_lock lock{mutex};
auto & handlers = handlers_by_type[static_cast<size_t>(type)];
handlers.push_back(handler);
auto handler_it = std::prev(handlers.end());
if (handlers.size() == 1)
updateSubscriptionsToNestedStorages(lock);
return [this, type, handler_it]
{
std::unique_lock lock2{mutex};
auto & handlers2 = handlers_by_type[static_cast<size_t>(type)];
handlers2.erase(handler_it);
if (handlers2.empty())
updateSubscriptionsToNestedStorages(lock2);
};
}
bool MultipleAccessStorage::hasSubscriptionImpl(EntityType type) const
{
for (const auto & nested_storage : nested_storages)
std::lock_guard lock{mutex};
const auto & handlers = handlers_by_type[static_cast<size_t>(type)];
return !handlers.empty();
}
/// Updates subscriptions to nested storages.
/// We need the subscriptions to the nested storages if someone has subscribed to us.
/// If any of the nested storages is changed we call our subscribers.
void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock<std::mutex> & lock) const
{
/// lock is already locked.
std::vector<std::pair<StoragePtr, ext::scope_guard>> added_subscriptions[static_cast<size_t>(EntityType::MAX)];
std::vector<ext::scope_guard> removed_subscriptions;
for (auto type : ext::range(EntityType::MAX))
{
if (nested_storage->hasSubscription(type))
return true;
auto & handlers = handlers_by_type[static_cast<size_t>(type)];
auto & subscriptions = subscriptions_to_nested_storages[static_cast<size_t>(type)];
if (handlers.empty())
{
/// None has subscribed to us, we need no subscriptions to the nested storages.
for (auto & subscription : subscriptions | boost::adaptors::map_values)
removed_subscriptions.push_back(std::move(subscription));
subscriptions.clear();
}
else
{
/// Someone has subscribed to us, now we need to have a subscription to each nested storage.
for (auto it = subscriptions.begin(); it != subscriptions.end();)
{
const auto & storage = it->first;
auto & subscription = it->second;
if (boost::range::find(*nested_storages, storage) == nested_storages->end())
{
removed_subscriptions.push_back(std::move(subscription));
it = subscriptions.erase(it);
}
else
++it;
}
for (const auto & storage : *nested_storages)
{
if (!subscriptions.count(storage))
added_subscriptions[static_cast<size_t>(type)].push_back({storage, nullptr});
}
}
}
return false;
/// Unlock the mutex temporarily because it's much better to subscribe to the nested storages
/// with the mutex unlocked.
lock.unlock();
removed_subscriptions.clear();
for (auto type : ext::range(EntityType::MAX))
{
if (!added_subscriptions[static_cast<size_t>(type)].empty())
{
auto on_changed = [this, type](const UUID & id, const AccessEntityPtr & entity)
{
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock2{mutex};
for (const auto & handler : handlers_by_type[static_cast<size_t>(type)])
notifications.push_back({handler, id, entity});
};
for (auto & [storage, subscription] : added_subscriptions[static_cast<size_t>(type)])
subscription = storage->subscribeForChanges(type, on_changed);
}
}
/// Lock the mutex again to store added subscriptions to the nested storages.
lock.lock();
for (auto type : ext::range(EntityType::MAX))
{
if (!added_subscriptions[static_cast<size_t>(type)].empty())
{
auto & subscriptions = subscriptions_to_nested_storages[static_cast<size_t>(type)];
for (auto & [storage, subscription] : added_subscriptions[static_cast<size_t>(type)])
{
if (!subscriptions.count(storage) && (boost::range::find(*nested_storages, storage) != nested_storages->end())
&& !handlers_by_type[static_cast<size_t>(type)].empty())
{
subscriptions.emplace(std::move(storage), std::move(subscription));
}
}
}
}
lock.unlock();
added_subscriptions->clear();
}
}

View File

@ -11,24 +11,27 @@ namespace DB
class MultipleAccessStorage : public IAccessStorage
{
public:
static constexpr char STORAGE_TYPE[] = "multiple";
using Storage = IAccessStorage;
using StoragePtr = std::shared_ptr<Storage>;
using ConstStoragePtr = std::shared_ptr<const Storage>;
MultipleAccessStorage(std::vector<std::unique_ptr<Storage>> nested_storages_);
MultipleAccessStorage(const String & storage_name_ = STORAGE_TYPE);
std::vector<UUID> findMultiple(EntityType type, const String & name) const;
const char * getStorageType() const override { return STORAGE_TYPE; }
template <typename EntityType>
std::vector<UUID> findMultiple(const String & name) const { return findMultiple(EntityType::TYPE, name); }
void setStorages(const std::vector<StoragePtr> & storages);
void addStorage(const StoragePtr & new_storage);
void removeStorage(const StoragePtr & storage_to_remove);
std::vector<StoragePtr> getStorages();
std::vector<ConstStoragePtr> getStorages() const;
std::shared_ptr<const std::vector<StoragePtr>> getStoragesPtr();
const Storage * findStorage(const UUID & id) const;
Storage * findStorage(const UUID & id);
const Storage & getStorage(const UUID & id) const;
Storage & getStorage(const UUID & id);
void addStorage(std::unique_ptr<Storage> nested_storage);
Storage & getStorageByIndex(size_t i) { return *(nested_storages[i]); }
const Storage & getStorageByIndex(size_t i) const { return *(nested_storages[i]); }
ConstStoragePtr findStorage(const UUID & id) const;
StoragePtr findStorage(const UUID & id);
ConstStoragePtr getStorage(const UUID & id) const;
StoragePtr getStorage(const UUID & id);
protected:
std::optional<UUID> findImpl(EntityType type, const String & name) const override;
@ -46,9 +49,15 @@ protected:
bool hasSubscriptionImpl(EntityType type) const override;
private:
std::vector<std::unique_ptr<Storage>> nested_storages;
mutable LRUCache<UUID, Storage *> ids_cache;
mutable std::mutex ids_cache_mutex;
using Storages = std::vector<StoragePtr>;
std::shared_ptr<const Storages> getStoragesInternal() const;
void updateSubscriptionsToNestedStorages(std::unique_lock<std::mutex> & lock) const;
std::shared_ptr<const Storages> nested_storages;
mutable LRUCache<UUID, Storage> ids_cache;
mutable std::list<OnChangedHandler> handlers_by_type[static_cast<size_t>(EntityType::MAX)];
mutable std::unordered_map<StoragePtr, ext::scope_guard> subscriptions_to_nested_storages[static_cast<size_t>(EntityType::MAX)];
mutable std::mutex mutex;
};
}

View File

@ -4,6 +4,7 @@
#include <Access/User.h>
#include <Access/SettingsProfile.h>
#include <Dictionaries/IDictionary.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Core/Settings.h>
@ -13,6 +14,7 @@
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/adaptor/map.hpp>
#include <cstring>
#include <filesystem>
namespace DB
@ -467,19 +469,35 @@ namespace
}
UsersConfigAccessStorage::UsersConfigAccessStorage() : IAccessStorage("users.xml")
UsersConfigAccessStorage::UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_)
: UsersConfigAccessStorage(STORAGE_TYPE, check_setting_name_function_)
{
}
void UsersConfigAccessStorage::setCheckSettingNameFunction(
const std::function<void(const std::string_view &)> & check_setting_name_function_)
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const CheckSettingNameFunction & check_setting_name_function_)
: IAccessStorage(storage_name_), check_setting_name_function(check_setting_name_function_)
{
check_setting_name_function = check_setting_name_function_;
}
UsersConfigAccessStorage::~UsersConfigAccessStorage() = default;
String UsersConfigAccessStorage::getStoragePath() const
{
std::lock_guard lock{load_mutex};
return path;
}
void UsersConfigAccessStorage::setConfiguration(const Poco::Util::AbstractConfiguration & config)
void UsersConfigAccessStorage::setConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard lock{load_mutex};
path.clear();
config_reloader.reset();
parseFromConfig(config);
}
void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & entity : parseUsers(config, getLogger()))
@ -493,6 +511,41 @@ void UsersConfigAccessStorage::setConfiguration(const Poco::Util::AbstractConfig
memory_storage.setAll(all_entities);
}
void UsersConfigAccessStorage::load(const String & users_config_path,
const String & include_from_path,
const String & preprocessed_dir,
const zkutil::GetZooKeeper & get_zookeeper_function)
{
std::lock_guard lock{load_mutex};
path = std::filesystem::canonical(users_config_path);
config_reloader.reset();
config_reloader = std::make_unique<ConfigReloader>(
users_config_path,
include_from_path,
preprocessed_dir,
zkutil::ZooKeeperNodeCache(get_zookeeper_function),
std::make_shared<Poco::Event>(),
[&](Poco::AutoPtr<Poco::Util::AbstractConfiguration> new_config)
{
parseFromConfig(*new_config);
Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path);
},
/* already_loaded = */ false);
}
void UsersConfigAccessStorage::reload()
{
std::lock_guard lock{load_mutex};
if (config_reloader)
config_reloader->reload();
}
void UsersConfigAccessStorage::startPeriodicReloading()
{
std::lock_guard lock{load_mutex};
if (config_reloader)
config_reloader->start();
}
std::optional<UUID> UsersConfigAccessStorage::findImpl(EntityType type, const String & name) const
{

View File

@ -1,29 +1,46 @@
#pragma once
#include <Access/MemoryAccessStorage.h>
#include <Common/ZooKeeper/Common.h>
namespace Poco
namespace Poco::Util
{
namespace Util
{
class AbstractConfiguration;
}
class AbstractConfiguration;
}
namespace DB
{
class ConfigReloader;
/// Implementation of IAccessStorage which loads all from users.xml periodically.
class UsersConfigAccessStorage : public IAccessStorage
{
public:
UsersConfigAccessStorage();
static constexpr char STORAGE_TYPE[] = "users.xml";
using CheckSettingNameFunction = std::function<void(const std::string_view &)>;
void setCheckSettingNameFunction(const std::function<void(const std::string_view &)> & check_setting_name_function_);
void setConfiguration(const Poco::Util::AbstractConfiguration & config);
UsersConfigAccessStorage(const String & storage_name_ = STORAGE_TYPE, const CheckSettingNameFunction & check_setting_name_function_ = {});
UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_);
~UsersConfigAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
String getStoragePath() const override;
bool isStorageReadOnly() const override { return true; }
void setConfig(const Poco::Util::AbstractConfiguration & config);
void load(const String & users_config_path,
const String & include_from_path = {},
const String & preprocessed_dir = {},
const zkutil::GetZooKeeper & get_zookeeper_function = {});
void reload();
void startPeriodicReloading();
private:
void parseFromConfig(const Poco::Util::AbstractConfiguration & config);
std::optional<UUID> findImpl(EntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(EntityType type) const override;
bool existsImpl(const UUID & id) const override;
@ -39,6 +56,10 @@ private:
bool hasSubscriptionImpl(EntityType type) const override;
MemoryAccessStorage memory_storage;
std::function<void(const std::string_view &)> check_setting_name_function;
CheckSettingNameFunction check_setting_name_function;
String path;
std::unique_ptr<ConfigReloader> config_reloader;
mutable std::mutex load_mutex;
};
}

View File

@ -64,7 +64,10 @@ public:
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).value.merge(this->data(rhs).value);
auto & set = this->data(place).value;
if (set.capacity() != reserved)
set.resize(reserved);
set.merge(this->data(rhs).value);
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
@ -197,7 +200,10 @@ public:
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).value.merge(this->data(rhs).value);
auto & set = this->data(place).value;
if (set.capacity() != reserved)
set.resize(reserved);
set.merge(this->data(rhs).value);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override

View File

@ -500,6 +500,56 @@ Float NO_INLINE really_unrolled(const PODArray<UInt8> & keys, const PODArray<Flo
}
struct State4
{
Float sum[4] = {0, 0, 0, 0};
size_t count[4] = {0, 0, 0, 0};
template <UInt32 idx>
void add(Float value)
{
sum[idx] += value;
++count[idx];
}
Float result() const
{
return (sum[0] + sum[1] + sum[2] + sum[3]) / (count[0] + count[1] + count[2] + count[3]);
}
};
Float NO_INLINE another_unrolled_x4(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
State4 map[256]{};
size_t size = keys.size() & ~size_t(3);
for (size_t i = 0; i < size; i+=4)
{
map[keys[i]].add<0>(values[i]);
map[keys[i+1]].add<1>(values[i]);
map[keys[i+2]].add<2>(values[i]);
map[keys[i+3]].add<3>(values[i]);
}
/// tail
switch (keys.size() % 4)
{
case 3:
map[keys[size + 2]].add<2>(values[size + 2]);
[[fallthrough]];
case 2:
map[keys[size + 1]].add<1>(values[size + 1]);
[[fallthrough]];
case 1:
map[keys[size]].add<0>(values[size]);
[[fallthrough]];
default:
break;
}
return map[0].result();
}
int main(int argc, char ** argv)
{
size_t size = argc > 1 ? std::stoull(argv[1]) : 1000000000;
@ -533,6 +583,7 @@ int main(int argc, char ** argv)
case 9: res = baseline_baseline(keys, values); break;
case 10: res = buffered(keys, values); break;
case 11: res = really_unrolled<1>(keys, values); break;
case 12: res = another_unrolled_x4(keys, values); break;
case 32: res = unrolled<2>(keys, values); break;
case 34: res = unrolled<4>(keys, values); break;
@ -570,6 +621,8 @@ int main(int argc, char ** argv)
case 118: res = really_unrolled<8>(keys, values); break;
case 1116: res = really_unrolled<16>(keys, values); break;
case 124: res = another_unrolled_x4(keys, values); break;
default: break;
}

View File

@ -13,6 +13,7 @@ namespace ErrorCodes
{
extern const int THERE_IS_NO_PROFILE;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
}
@ -106,4 +107,27 @@ void Settings::addProgramOptions(boost::program_options::options_description & o
field.getDescription())));
}
}
void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path)
{
if (config.getBool("skip_check_for_incorrect_settings", false))
return;
Settings settings;
for (auto setting : settings.all())
{
const auto & name = setting.getName();
if (config.has(name))
{
throw Exception(fmt::format("A setting '{}' appeared at top level in config {}."
" But it is user-level setting that should be located in users.xml inside <profiles> section for specific profile."
" You can add it to <profiles><default> if you want to change default value of this setting."
" You can also disable the check - specify <skip_check_for_incorrect_settings>1</skip_check_for_incorrect_settings>"
" in the main configuration file.",
name, config_path),
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
}
}
}

View File

@ -490,6 +490,10 @@ struct Settings : public BaseSettings<SettingsTraits>
/// Adds program options to set the settings from a command line.
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
void addProgramOptions(boost::program_options::options_description & options);
/// Check that there is no user-level settings at the top level in config.
/// This is a common source of mistake (user don't know where to write user-level setting).
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
};
}

View File

@ -17,12 +17,6 @@ SRCS(
ExternalTable.cpp
Field.cpp
iostream_debug_helpers.cpp
NamesAndTypes.cpp
PostgreSQLProtocol.cpp
Settings.cpp
SettingsEnums.cpp
SettingsFields.cpp
SortDescription.cpp
MySQL/Authentication.cpp
MySQL/IMySQLReadPacket.cpp
MySQL/IMySQLWritePacket.cpp
@ -34,6 +28,12 @@ SRCS(
MySQL/PacketsGeneric.cpp
MySQL/PacketsProtocolText.cpp
MySQL/PacketsReplication.cpp
NamesAndTypes.cpp
PostgreSQLProtocol.cpp
Settings.cpp
SettingsEnums.cpp
SettingsFields.cpp
SortDescription.cpp
)

View File

@ -34,6 +34,7 @@ target_link_libraries(clickhouse_functions
${FASTOPS_LIBRARY}
clickhouse_dictionaries
clickhouse_dictionaries_embedded
clickhouse_parsers
consistent-hashing
consistent-hashing-sumbur
dbms

View File

@ -0,0 +1,175 @@
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Parsers/Lexer.h>
#include <common/find_symbols.h>
#include <Common/StringUtils/StringUtils.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
namespace
{
struct Impl
{
static void vector(const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t size = offsets.size();
res_offsets.resize(size);
res_data.reserve(data.size());
ColumnString::Offset prev_src_offset = 0;
for (size_t i = 0; i < size; ++i)
{
ColumnString::Offset curr_src_offset = offsets[i];
Lexer lexer(reinterpret_cast<const char *>(&data[prev_src_offset]), reinterpret_cast<const char *>(&data[curr_src_offset - 1]));
prev_src_offset = offsets[i];
/// Coalesce whitespace characters and comments to a single whitespace.
bool prev_insignificant = false;
/// Coalesce a list of comma separated literals to a single '?..' sequence.
size_t num_literals_in_sequence = 0;
bool prev_comma = false;
bool prev_whitespace = false;
while (true)
{
Token token = lexer.nextToken();
if (!token.isSignificant())
{
/// Replace a sequence of insignificant tokens with single whitespace.
if (!prev_insignificant)
{
if (0 == num_literals_in_sequence)
res_data.push_back(' ');
else
prev_whitespace = true;
}
prev_insignificant = true;
continue;
}
prev_insignificant = false;
/// Literals.
if (token.type == TokenType::Number || token.type == TokenType::StringLiteral)
{
if (0 == num_literals_in_sequence)
res_data.push_back('?');
++num_literals_in_sequence;
prev_whitespace = false;
prev_comma = false;
continue;
}
else if (token.type == TokenType::Comma)
{
if (num_literals_in_sequence)
{
prev_comma = true;
continue;
}
}
else
{
if (num_literals_in_sequence > 1)
{
res_data.push_back('.');
res_data.push_back('.');
}
if (prev_comma)
res_data.push_back(',');
if (prev_whitespace)
res_data.push_back(' ');
num_literals_in_sequence = 0;
prev_comma = false;
prev_whitespace = false;
}
/// Slightly normalize something that look like aliases - if they are complex, replace them to `?` placeholders.
if (token.type == TokenType::QuotedIdentifier
/// Differentiate identifier from function (example: SHA224(x)).
/// By the way, there is padding in columns and pointer dereference is Ok.
|| (token.type == TokenType::BareWord && *token.end != '('))
{
/// Identifier is complex if it contains whitespace or more than two digits
/// or it's at least 36 bytes long (UUID for example).
size_t num_digits = 0;
const char * pos = token.begin;
if (token.size() < 36)
{
for (; pos != token.end; ++pos)
{
if (isWhitespaceASCII(*pos))
break;
if (isNumericASCII(*pos))
{
++num_digits;
if (num_digits > 2)
break;
}
}
}
if (pos == token.end)
{
res_data.insert(token.begin, token.end);
}
else
{
res_data.push_back('`');
res_data.push_back('?');
res_data.push_back('`');
}
continue;
}
if (token.isEnd() || token.isError())
break;
res_data.insert(token.begin, token.end);
}
res_data.push_back(0);
res_offsets[i] = res_data.size();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception("Cannot apply function normalizeQuery to fixed string.", ErrorCodes::ILLEGAL_COLUMN);
}
};
struct Name
{
static constexpr auto name = "normalizeQuery";
};
}
void registerFunctionNormalizeQuery(FunctionFactory & factory)
{
factory.registerFunction<FunctionStringToString<Impl, Name>>();
}
}

View File

@ -0,0 +1,187 @@
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Functions/FunctionFactory.h>
#include <Parsers/Lexer.h>
#include <common/find_symbols.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/SipHash.h>
/** The function returns 64bit hash value that is identical for similar queries.
* See also 'normalizeQuery'. This function is only slightly more efficient.
*/
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
struct Impl
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
PaddedPODArray<UInt64> & res_data)
{
size_t size = offsets.size();
res_data.resize(size);
ColumnString::Offset prev_src_offset = 0;
for (size_t i = 0; i < size; ++i)
{
SipHash hash;
ColumnString::Offset curr_src_offset = offsets[i];
Lexer lexer(reinterpret_cast<const char *>(&data[prev_src_offset]), reinterpret_cast<const char *>(&data[curr_src_offset - 1]));
prev_src_offset = offsets[i];
/// Coalesce a list of comma separated literals.
size_t num_literals_in_sequence = 0;
bool prev_comma = false;
while (true)
{
Token token = lexer.nextToken();
if (!token.isSignificant())
continue;
/// Literals.
if (token.type == TokenType::Number || token.type == TokenType::StringLiteral)
{
if (0 == num_literals_in_sequence)
hash.update("\x00", 1);
++num_literals_in_sequence;
prev_comma = false;
continue;
}
else if (token.type == TokenType::Comma)
{
if (num_literals_in_sequence)
{
prev_comma = true;
continue;
}
}
else
{
if (num_literals_in_sequence > 1)
hash.update("\x00", 1);
if (prev_comma)
hash.update(",", 1);
num_literals_in_sequence = 0;
prev_comma = false;
}
/// Slightly normalize something that look like aliases - if they are complex, replace them to `?` placeholders.
if (token.type == TokenType::QuotedIdentifier
/// Differentiate identifier from function (example: SHA224(x)).
/// By the way, there is padding in columns and pointer dereference is Ok.
|| (token.type == TokenType::BareWord && *token.end != '('))
{
/// Identifier is complex if it contains whitespace or more than two digits
/// or it's at least 36 bytes long (UUID for example).
size_t num_digits = 0;
const char * pos = token.begin;
if (token.size() < 36)
{
for (; pos != token.end; ++pos)
{
if (isWhitespaceASCII(*pos))
break;
if (isNumericASCII(*pos))
{
++num_digits;
if (num_digits > 2)
break;
}
}
}
if (pos == token.end)
hash.update(token.begin, token.size());
else
hash.update("\x01", 1);
continue;
}
if (token.isEnd() || token.isError())
break;
hash.update(token.begin, token.size());
}
res_data[i] = hash.get64();
}
}
};
class FunctionNormalizedQueryHash : public IFunction
{
public:
static constexpr auto name = "normalizedQueryHash";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionNormalizedQueryHash>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 1;
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeUInt64>();
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) const override
{
const ColumnPtr column = block.getByPosition(arguments[0]).column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnUInt64::create();
typename ColumnUInt64::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
Impl::vector(col->getChars(), col->getOffsets(), vec_res);
block.getByPosition(result).column = std::move(col_res);
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
}
void registerFunctionNormalizedQueryHash(FunctionFactory & factory)
{
factory.registerFunction<FunctionNormalizedQueryHash>();
}
}

View File

@ -30,6 +30,8 @@ void registerFunctionStartsWith(FunctionFactory &);
void registerFunctionEndsWith(FunctionFactory &);
void registerFunctionTrim(FunctionFactory &);
void registerFunctionRegexpQuoteMeta(FunctionFactory &);
void registerFunctionNormalizeQuery(FunctionFactory &);
void registerFunctionNormalizedQueryHash(FunctionFactory &);
#if USE_BASE64
void registerFunctionBase64Encode(FunctionFactory &);
@ -62,6 +64,8 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionEndsWith(factory);
registerFunctionTrim(factory);
registerFunctionRegexpQuoteMeta(factory);
registerFunctionNormalizeQuery(factory);
registerFunctionNormalizedQueryHash(factory);
#if USE_BASE64
registerFunctionBase64Encode(factory);
registerFunctionBase64Decode(factory);

View File

@ -18,6 +18,7 @@ ADDINCL(
PEERDIR(
clickhouse/src/Common
clickhouse/src/Parsers
clickhouse/src/Dictionaries
contrib/libs/farmhash
contrib/libs/fastops/fastops
@ -293,6 +294,8 @@ SRCS(
multiSearchFirstPositionUTF8.cpp
negate.cpp
neighbor.cpp
normalizedQueryHash.cpp
normalizeQuery.cpp
notEmpty.cpp
notEquals.cpp
notILike.cpp

View File

@ -17,6 +17,7 @@ ADDINCL(
PEERDIR(
clickhouse/src/Common
clickhouse/src/Parsers
clickhouse/src/Dictionaries
contrib/libs/farmhash
contrib/libs/fastops/fastops

View File

@ -133,7 +133,7 @@ bool PipelineExecutor::expandPipeline(Stack & stack, UInt64 pid)
for (uint64_t node = 0; node < graph->nodes.size(); ++node)
{
direct_edge_sizes[node] = graph->nodes[node]->direct_edges.size();
back_edges_sizes[node] = graph->nodes[node]->direct_edges.size();
back_edges_sizes[node] = graph->nodes[node]->back_edges.size();
}
auto updated_nodes = graph->expandPipeline(processors);

View File

@ -79,6 +79,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
extern const int ILLEGAL_COLUMN;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int TOO_MANY_ROWS;
}
@ -573,6 +574,8 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
/// Let's find what range to read from each part.
{
std::atomic<size_t> total_rows {0};
auto process_part = [&](size_t part_index)
{
auto & part = parts[part_index];
@ -599,7 +602,23 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
if (!ranges.ranges.empty())
{
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
{
/// Fail fast if estimated number of rows to read exceeds the limit
auto current_rows_estimate = ranges.getRowsCount();
size_t prev_total_rows_estimate = total_rows.fetch_add(current_rows_estimate);
size_t total_rows_estimate = current_rows_estimate + prev_total_rows_estimate;
if (total_rows_estimate > settings.max_rows_to_read)
throw Exception(
"Limit for rows (controlled by 'max_rows_to_read' setting) exceeded, max rows: "
+ formatReadableQuantity(settings.max_rows_to_read)
+ ", estimated rows to read (at least): " + formatReadableQuantity(total_rows_estimate),
ErrorCodes::TOO_MANY_ROWS);
}
parts_with_ranges[part_index] = std::move(ranges);
}
};
size_t num_threads = std::min(size_t(num_streams), parts.size());

View File

@ -112,9 +112,6 @@ void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, const Cont
auto quota = access_control.tryRead<Quota>(id);
if (!quota)
continue;
const auto * storage = access_control.findStorage(id);
if (!storage)
continue;
add_rows(quota->getName(), quota->all_limits);
}

View File

@ -114,7 +114,7 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, const Context &
auto quota = access_control.tryRead<Quota>(id);
if (!quota)
continue;
const auto * storage = access_control.findStorage(id);
auto storage = access_control.findStorage(id);
if (!storage)
continue;

View File

@ -49,7 +49,7 @@ void StorageSystemRoles::fillData(MutableColumns & res_columns, const Context &
if (!role)
continue;
const auto * storage = access_control.findStorage(id);
auto storage = access_control.findStorage(id);
if (!storage)
continue;

View File

@ -130,7 +130,7 @@ void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, const Cont
auto policy = access_control.tryRead<RowPolicy>(id);
if (!policy)
continue;
const auto * storage = access_control.findStorage(id);
auto storage = access_control.findStorage(id);
if (!storage)
continue;

View File

@ -76,7 +76,7 @@ void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, const
if (!profile)
continue;
const auto * storage = access_control.findStorage(id);
auto storage = access_control.findStorage(id);
if (!storage)
continue;

View File

@ -0,0 +1,56 @@
#include <Storages/System/StorageSystemUserDirectories.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Interpreters/Context.h>
#include <Access/AccessControlManager.h>
#include <ext/enumerate.h>
namespace DB
{
NamesAndTypesList StorageSystemUserDirectories::getNamesAndTypes()
{
NamesAndTypesList names_and_types{
{"name", std::make_shared<DataTypeString>()},
{"type", std::make_shared<DataTypeString>()},
{"path", std::make_shared<DataTypeString>()},
{"readonly", std::make_shared<DataTypeUInt8>()},
{"precedence", std::make_shared<DataTypeUInt64>()},
};
return names_and_types;
}
void StorageSystemUserDirectories::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
{
const auto & access_control = context.getAccessControlManager();
auto storages = access_control.getStorages();
size_t column_index = 0;
auto & column_name = assert_cast<ColumnString &>(*res_columns[column_index++]);
auto & column_type = assert_cast<ColumnString &>(*res_columns[column_index++]);
auto & column_path = assert_cast<ColumnString &>(*res_columns[column_index++]);
auto & column_readonly = assert_cast<ColumnUInt8 &>(*res_columns[column_index++]);
auto & column_precedence = assert_cast<ColumnUInt64 &>(*res_columns[column_index++]);
auto add_row = [&](const IAccessStorage & storage, size_t precedence)
{
const String & name = storage.getStorageName();
std::string_view type = storage.getStorageType();
const String & path = storage.getStoragePath();
bool readonly = storage.isStorageReadOnly();
column_name.insertData(name.data(), name.length());
column_type.insertData(type.data(), type.length());
column_path.insertData(path.data(), path.length());
column_readonly.insert(readonly);
column_precedence.insert(precedence);
};
for (auto [i, storage] : ext::enumerate(storages))
add_row(*storage, i + 1);
}
}

View File

@ -0,0 +1,24 @@
#pragma once
#include <ext/shared_ptr_helper.h>
#include <Storages/System/IStorageSystemOneBlock.h>
namespace DB
{
class Context;
/// Implements `users_directories` system table, which allows you to get information about user directories.
class StorageSystemUserDirectories final : public ext::shared_ptr_helper<StorageSystemUserDirectories>, public IStorageSystemOneBlock<StorageSystemUserDirectories>
{
public:
std::string getName() const override { return "SystemUserDirectories"; }
static NamesAndTypesList getNamesAndTypes();
protected:
friend struct ext::shared_ptr_helper<StorageSystemUserDirectories>;
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const override;
};
}

View File

@ -165,7 +165,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context &
if (!user)
continue;
const auto * storage = access_control.findStorage(id);
auto storage = access_control.findStorage(id);
if (!storage)
continue;

View File

@ -57,6 +57,7 @@
#include <Storages/System/StorageSystemQuotaLimits.h>
#include <Storages/System/StorageSystemQuotaUsage.h>
#include <Storages/System/StorageSystemQuotasUsage.h>
#include <Storages/System/StorageSystemUserDirectories.h>
#include <Storages/System/StorageSystemPrivileges.h>
#ifdef OS_LINUX
@ -102,6 +103,7 @@ void attachSystemTablesLocal(IDatabase & system_database)
attach<StorageSystemQuotaLimits>(system_database, "quota_limits");
attach<StorageSystemQuotaUsage>(system_database, "quota_usage");
attach<StorageSystemQuotasUsage>(system_database, "quotas_usage");
attach<StorageSystemUserDirectories>(system_database, "user_directories");
attach<StorageSystemPrivileges>(system_database, "privileges");
#if !defined(ARCADIA_BUILD)

View File

@ -182,6 +182,7 @@ SRCS(
System/StorageSystemTableEngines.cpp
System/StorageSystemTableFunctions.cpp
System/StorageSystemTables.cpp
System/StorageSystemUserDirectories.cpp
System/StorageSystemUsers.cpp
System/StorageSystemZeros.cpp
System/StorageSystemZooKeeper.cpp

View File

@ -200,8 +200,8 @@ def test_introspection():
assert expected_access2 in instance.query("SHOW ACCESS")
assert instance.query("SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\
TSV([[ "A", "disk", "no_password", "{}", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ],
[ "B", "disk", "no_password", "{}", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]])
TSV([[ "A", "local directory", "no_password", "{}", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ],
[ "B", "local directory", "no_password", "{}", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]])
assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') ORDER BY user_name, access_type, grant_option") ==\
TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ],

View File

@ -99,12 +99,13 @@ def test_materialize_database_ddl_with_mysql_5_7(started_cluster, started_mysql_
materialize_with_ddl.drop_table_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
materialize_with_ddl.create_table_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
materialize_with_ddl.rename_table_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
materialize_with_ddl.alter_add_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
materialize_with_ddl.alter_drop_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
#FIXME
#materialize_with_ddl.alter_add_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
#materialize_with_ddl.alter_drop_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
# mysql 5.7 cannot support alter rename column
# materialize_with_ddl.alter_rename_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
materialize_with_ddl.alter_rename_table_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
materialize_with_ddl.alter_modify_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
#materialize_with_ddl.alter_rename_table_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
#materialize_with_ddl.alter_modify_column_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql5_7")
def test_materialize_database_ddl_with_mysql_8_0(started_cluster, started_mysql_8_0):

View File

@ -177,8 +177,8 @@ def test_introspection():
assert expected_access3 in instance.query("SHOW ACCESS")
assert instance.query("SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name") ==\
TSV([[ "R1", "disk" ],
[ "R2", "disk" ]])
TSV([[ "R1", "local directory" ],
[ "R2", "local directory" ]])
assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, grant_option") ==\
TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ],

View File

@ -50,7 +50,7 @@ def test_smoke():
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n"
assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin")
assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin")
assert system_settings_profile("xyz") == [[ "xyz", "disk", 1, 0, "['robin']", "[]" ]]
assert system_settings_profile("xyz") == [[ "xyz", "local directory", 1, 0, "['robin']", "[]" ]]
assert system_settings_profile_elements(profile_name="xyz") == [[ "xyz", "\N", "\N", 0, "max_memory_usage", 100000001, 90000000, 110000000, "\N", "\N" ]]
instance.query("ALTER SETTINGS PROFILE xyz TO NONE")
@ -58,7 +58,7 @@ def test_smoke():
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n"
instance.query("SET max_memory_usage = 80000000", user="robin")
instance.query("SET max_memory_usage = 120000000", user="robin")
assert system_settings_profile("xyz") == [[ "xyz", "disk", 1, 0, "[]", "[]" ]]
assert system_settings_profile("xyz") == [[ "xyz", "local directory", 1, 0, "[]", "[]" ]]
assert system_settings_profile_elements(user_name="robin") == []
# Set settings and constraints via CREATE USER ... SETTINGS PROFILE
@ -87,7 +87,7 @@ def test_settings_from_granted_role():
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n"
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_ast_depth'", user="robin") == "2000\n"
assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin")
assert system_settings_profile("xyz") == [[ "xyz", "disk", 2, 0, "[]", "[]" ]]
assert system_settings_profile("xyz") == [[ "xyz", "local directory", 2, 0, "[]", "[]" ]]
assert system_settings_profile_elements(profile_name="xyz") == [[ "xyz", "\N", "\N", 0, "max_memory_usage", 100000001, "\N", 110000000, "\N", "\N" ],
[ "xyz", "\N", "\N", 1, "max_ast_depth", 2000, "\N", "\N", "\N", "\N" ]]
assert system_settings_profile_elements(role_name="worker") == [[ "\N", "\N", "worker", 0, "\N", "\N", "\N", "\N", "\N", "xyz" ]]
@ -108,13 +108,13 @@ def test_settings_from_granted_role():
assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MAX 110000000, max_ast_depth = 2000 TO worker\n"
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n"
assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin")
assert system_settings_profile("xyz") == [[ "xyz", "disk", 2, 0, "['worker']", "[]" ]]
assert system_settings_profile("xyz") == [[ "xyz", "local directory", 2, 0, "['worker']", "[]" ]]
instance.query("ALTER SETTINGS PROFILE xyz TO NONE")
assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MAX 110000000, max_ast_depth = 2000\n"
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n"
instance.query("SET max_memory_usage = 120000000", user="robin")
assert system_settings_profile("xyz") == [[ "xyz", "disk", 2, 0, "[]", "[]" ]]
assert system_settings_profile("xyz") == [[ "xyz", "local directory", 2, 0, "[]", "[]" ]]
def test_inheritance():
@ -125,9 +125,9 @@ def test_inheritance():
assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000002\n"
assert "Setting max_memory_usage should not be changed" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin")
assert system_settings_profile("xyz") == [[ "xyz", "disk", 1, 0, "[]", "[]" ]]
assert system_settings_profile("xyz") == [[ "xyz", "local directory", 1, 0, "[]", "[]" ]]
assert system_settings_profile_elements(profile_name="xyz") == [[ "xyz", "\N", "\N", 0, "max_memory_usage", 100000002, "\N", "\N", 1, "\N" ]]
assert system_settings_profile("alpha") == [[ "alpha", "disk", 1, 0, "['robin']", "[]" ]]
assert system_settings_profile("alpha") == [[ "alpha", "local directory", 1, 0, "['robin']", "[]" ]]
assert system_settings_profile_elements(profile_name="alpha") == [[ "alpha", "\N", "\N", 0, "\N", "\N", "\N", "\N", "\N", "xyz" ]]
assert system_settings_profile_elements(user_name="robin") == []

View File

@ -0,0 +1,15 @@
<yandex>
<user_directories replace="replace">
<users_xml>
<path>/etc/clickhouse-server/users3.xml</path>
</users_xml>
<local_directory>
<path>/var/lib/clickhouse/access3/</path>
</local_directory>
<local_directory>
<name>local directory (ro)</name>
<readonly>true</readonly>
<path>/var/lib/clickhouse/access3-ro/</path>
</local_directory>
</user_directories>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<user_directories replace="replace">
<users_xml>
<path>/etc/clickhouse-server/users5.xml</path>
</users_xml>
<memory/>
</user_directories>
</yandex>

View File

@ -0,0 +1,5 @@
<yandex>
<users_config>/etc/clickhouse-server/users2.xml</users_config>
<access_control_path>/var/lib/clickhouse/access2/</access_control_path>
<user_directories remove="remove"/>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<user_directories replace="replace">
<users_xml>
<path>users4.xml</path>
</users_xml>
</user_directories>
</yandex>

View File

@ -0,0 +1,51 @@
import pytest
import os
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', stay_alive=True)
@pytest.fixture(scope="module", autouse=True)
def started_cluster():
try:
cluster.start()
node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users2.xml")
node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users3.xml")
node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users4.xml")
node.exec_in_container("cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users5.xml")
yield cluster
finally:
cluster.shutdown()
def test_old_style():
node.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/old_style.xml"), '/etc/clickhouse-server/config.d/z.xml')
node.restart_clickhouse()
assert node.query("SELECT * FROM system.user_directories") == TSV([["users.xml", "users.xml", "/etc/clickhouse-server/users2.xml", 1, 1],
["local directory", "local directory", "/var/lib/clickhouse/access2/", 0, 2]])
def test_local_directories():
node.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/local_directories.xml"), '/etc/clickhouse-server/config.d/z.xml')
node.restart_clickhouse()
assert node.query("SELECT * FROM system.user_directories") == TSV([["users.xml", "users.xml", "/etc/clickhouse-server/users3.xml", 1, 1],
["local directory", "local directory", "/var/lib/clickhouse/access3/", 0, 2],
["local directory (ro)", "local directory", "/var/lib/clickhouse/access3-ro/", 1, 3]])
def test_relative_path():
node.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/relative_path.xml"), '/etc/clickhouse-server/config.d/z.xml')
node.restart_clickhouse()
assert node.query("SELECT * FROM system.user_directories") == TSV([["users.xml", "users.xml", "/etc/clickhouse-server/users4.xml", 1, 1]])
def test_memory():
node.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/memory.xml"), '/etc/clickhouse-server/config.d/z.xml')
node.restart_clickhouse()
assert node.query("SELECT * FROM system.user_directories") == TSV([["users.xml", "users.xml", "/etc/clickhouse-server/users5.xml", 1, 1],
["memory", "memory", "", 0, 2]])

View File

@ -12,6 +12,7 @@ DROP TABLE IF EXISTS mv_checkouts2target;
-- that is the final table, which is filled incrementally from 2 different sources
CREATE TABLE target_table Engine=SummingMergeTree() ORDER BY id
SETTINGS index_granularity=128
AS
SELECT
number as id,
@ -85,12 +86,20 @@ INSERT INTO logins SELECT number as id, '2000-01-01 08:00:00' from numbers(50
INSERT INTO checkouts SELECT number as id, '2000-01-01 10:00:00' from numbers(50000);
-- ensure that we don't read whole target table during join
set max_rows_to_read = 2000;
-- by this time we should have 3 parts for target_table because of prev inserts
-- and we plan to make two more inserts. With index_granularity=128 and max id=1000
-- we expect to read not more than:
-- (1000/128) marks per part * (3 + 2) parts * 128 granularity = 5120 rows
set max_rows_to_read = 5120;
INSERT INTO logins SELECT number as id, '2000-01-01 11:00:00' from numbers(1000);
INSERT INTO checkouts SELECT number as id, '2000-01-01 11:10:00' from numbers(1000);
set max_rows_to_read = 10;
-- by this time we should have 5 parts for target_table because of prev inserts
-- and we plan to make two more inserts. With index_granularity=128 and max id=1
-- we expect to read not more than:
-- 1 mark per part * (5 + 2) parts * 128 granularity = 896 rows
set max_rows_to_read = 896;
INSERT INTO logins SELECT number+2 as id, '2001-01-01 11:10:01' from numbers(1);
INSERT INTO checkouts SELECT number+2 as id, '2001-01-01 11:10:02' from numbers(1);

View File

@ -104,10 +104,10 @@ CREATE USER u2_01292 DEFAULT ROLE r1_01292, r2_01292 SETTINGS readonly = 1
CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292
CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292
-- system.users
u1_01292 disk plaintext_password {} [] ['localhost'] [] [] 1 [] []
u2_01292 disk no_password {} [] [] [] ['%.%.myhost.com'] 0 [] []
u3_01292 disk sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] []
u4_01292 disk double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292']
u1_01292 local directory plaintext_password {} [] ['localhost'] [] [] 1 [] []
u2_01292 local directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] []
u3_01292 local directory sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] []
u4_01292 local directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292']
-- system.settings_profile_elements
\N u1_01292 \N 0 readonly 1 \N \N \N \N
\N u2_01292 \N 0 \N \N \N \N \N default

View File

@ -28,7 +28,7 @@ CREATE ROLE r2_01293
CREATE ROLE r1_01293 SETTINGS readonly = 1
CREATE ROLE r2_01293 SETTINGS readonly = 1
-- system.roles
r1_01293 disk
r1_01293 local directory
-- system.settings_profile_elements
\N \N r1_01293 0 readonly 1 \N \N \N \N
\N \N r2_01293 0 \N \N \N \N \N default

View File

@ -42,11 +42,11 @@ CREATE SETTINGS PROFILE s2_01294 SETTINGS max_memory_usage = 6000000 TO r1_01294
CREATE SETTINGS PROFILE s3_01294 SETTINGS max_memory_usage = 6000000 TO r1_01294
CREATE SETTINGS PROFILE s4_01294 TO r1_01294
-- system.settings_profiles
s1_01294 disk 0 0 [] []
s2_01294 disk 1 0 ['r1_01294'] []
s3_01294 disk 1 0 ['r1_01294'] []
s4_01294 disk 1 0 ['r1_01294'] []
s5_01294 disk 3 1 [] ['r1_01294']
s1_01294 local directory 0 0 [] []
s2_01294 local directory 1 0 ['r1_01294'] []
s3_01294 local directory 1 0 ['r1_01294'] []
s4_01294 local directory 1 0 ['r1_01294'] []
s5_01294 local directory 3 1 [] ['r1_01294']
-- system.settings_profile_elements
s2_01294 \N \N 0 readonly 0 \N \N \N \N
s3_01294 \N \N 0 max_memory_usage 5000000 4000000 6000000 1 \N

View File

@ -30,6 +30,6 @@ CREATE ROW POLICY p5_01295 ON db2.table2 FOR SELECT USING a = b
CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 TO ALL
CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 TO ALL
-- system.row_policies
p1_01295 ON db.table p1_01295 db table disk (a < b) AND (c > d) 0 0 [] []
p2_01295 ON db.table p2_01295 db table disk id = currentUser() 1 0 ['u1_01295'] []
p3_01295 ON db.table p3_01295 db table disk 1 0 1 [] ['r1_01295']
p1_01295 ON db.table p1_01295 db table local directory (a < b) AND (c > d) 0 0 [] []
p2_01295 ON db.table p2_01295 db table local directory id = currentUser() 1 0 ['u1_01295'] []
p3_01295 ON db.table p3_01295 db table local directory 1 0 1 [] ['r1_01295']

View File

@ -52,10 +52,10 @@ CREATE QUOTA q2_01297 FOR INTERVAL 1 day MAX errors = 5
CREATE QUOTA q1_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297
CREATE QUOTA q2_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297
-- system.quotas
q1_01297 disk ['user_name'] [] 0 ['r1_01297'] []
q2_01297 disk [] [5259492] 0 ['r1_01297','u1_01297'] []
q3_01297 disk ['client_key','user_name'] [5259492,15778476] 0 [] []
q4_01297 disk [] [604800] 1 [] ['u1_01297']
q1_01297 local directory ['user_name'] [] 0 ['r1_01297'] []
q2_01297 local directory [] [5259492] 0 ['r1_01297','u1_01297'] []
q3_01297 local directory ['client_key','user_name'] [5259492,15778476] 0 [] []
q4_01297 local directory [] [604800] 1 [] ['u1_01297']
-- system.quota_limits
q2_01297 5259492 0 100 11 1000 10000 1001 10001 2.5
q3_01297 5259492 0 \N \N 1002 \N \N \N \N

View File

@ -1,4 +1,5 @@
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, number) AS state FROM system.numbers LIMIT 1000);
SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', 1, number) AS state FROM system.numbers LIMIT 1000);
SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, 1) AS state FROM system.numbers LIMIT 1000);
-- order in libstdc++ and libc++ may be a little different, that's why we need arraySort()
SELECT arraySort(topKWeightedMerge(10)(state)) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, 1) AS state FROM system.numbers LIMIT 1000);

View File

@ -0,0 +1,6 @@
AggregateFunctionTopK
20
20
AggregateFunctionTopKGenericData
20
20

View File

@ -0,0 +1,13 @@
drop table if exists data_01409;
create table data_01409 engine=Memory as select * from numbers(20);
-- easier to check merging via distributed tables
-- but can be done vai topKMerge(topKState()) as well
select 'AggregateFunctionTopK';
select length(topK(20)(number)) from remote('127.{1,1}', currentDatabase(), data_01409);
select length(topKWeighted(20)(number, 1)) from remote('127.{1,1}', currentDatabase(), data_01409);
select 'AggregateFunctionTopKGenericData';
select length(topK(20)((number, ''))) from remote('127.{1,1}', currentDatabase(), data_01409);
select length(topKWeighted(20)((number, ''), 1)) from remote('127.{1,1}', currentDatabase(), data_01409);

View File

@ -0,0 +1,22 @@
SELECT ?
SELECT ?
SELECT ?..
SELECT ?..,
SELECT ?..,
SELECT ?..
SELECT ?.. WHERE ?
SELECT ?.. WHERE ? = ?
SELECT ?.. WHERE ? = ? AND (x, y)
SELECT ?.. WHERE ? = ? AND (?, y)
[?..]
[?.., x]
SELECT ?.. WHERE ? = ? AND (?, y) LIMIT ?..
SELECT ? AS `xyz`
SELECT ? AS `xyz1`
SELECT ? AS `xyz11`
SELECT ? AS `?`
SELECT ? AS xyz1
SELECT ? AS xyz11
SELECT ? xyz11
SELECT ?, xyz11
SELECT ?..

View File

@ -0,0 +1,22 @@
SELECT normalizeQuery('SELECT 1');
SELECT normalizeQuery('SELECT 1');
SELECT normalizeQuery('SELECT 1, 1, 1');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hwllo */');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */ \'abc\'');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */ \'abc\' WHERE 1');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */ \'abc\' WHERE 1 = 1');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */ \'abc\' WHERE 1 = 1 AND (x, y)');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */ \'abc\' WHERE 1 = 1 AND (1, y)');
SELECT normalizeQuery('[1, 2, 3]');
SELECT normalizeQuery('[1, 2, 3, x]');
SELECT normalizeQuery('SELECT 1, 1, 1, /* Hello */ \'abc\' WHERE 1 = 1 AND (1, y) LIMIT 1, 1');
SELECT normalizeQuery('SELECT 1 AS `xyz`');
SELECT normalizeQuery('SELECT 1 AS `xyz1`');
SELECT normalizeQuery('SELECT 1 AS `xyz11`');
SELECT normalizeQuery('SELECT 1 AS xyz111');
SELECT normalizeQuery('SELECT 1 AS xyz1');
SELECT normalizeQuery('SELECT 1 AS xyz11');
SELECT normalizeQuery('SELECT 1 xyz11');
SELECT normalizeQuery('SELECT 1, xyz11');
SELECT normalizeQuery('SELECT 1, ''xyz11''');

View File

@ -0,0 +1,7 @@
1
1
1
1
1
1
1

View File

@ -0,0 +1,7 @@
SELECT normalizedQueryHash('SELECT 1') = normalizedQueryHash('SELECT 2');
SELECT normalizedQueryHash('SELECT 1') != normalizedQueryHash('SELECT 1, 1, 1');
SELECT normalizedQueryHash('SELECT 1, 1, 1, /* Hello */ \'abc\'') = normalizedQueryHash('SELECT 2, 3');
SELECT normalizedQueryHash('[1, 2, 3]') = normalizedQueryHash('[1, ''x'']');
SELECT normalizedQueryHash('[1, 2, 3, x]') != normalizedQueryHash('[1, x]');
SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`');
SELECT normalizedQueryHash('SELECT 1 AS xyz111') = normalizedQueryHash('SELECT 2 AS xyz234');

View File

@ -0,0 +1 @@
SELECT ? AS `?` 1

Some files were not shown because too many files have changed in this diff Show More