Merge branch 'master' into fix-data-size

This commit is contained in:
Antonio Andelic 2023-03-03 15:33:43 +01:00 committed by GitHub
commit cd4100cb19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
404 changed files with 4895 additions and 1896 deletions

View File

@ -209,3 +209,5 @@ CheckOptions:
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
value: true

View File

@ -391,10 +391,12 @@ if (COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
endif ()
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it

View File

@ -195,7 +195,6 @@ long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, un
#include <sys/stat.h>
#include <stdint.h>
#if !defined(__aarch64__)
struct statx {
uint32_t stx_mask;
uint32_t stx_blksize;
@ -226,7 +225,6 @@ int statx(int fd, const char *restrict path, int flag,
{
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
}
#endif
#include <syscall.h>

View File

@ -8,3 +8,8 @@ int fallocate(int fd, int mode, off_t base, off_t len)
{
return syscall(SYS_fallocate, fd, mode, base, len);
}
int fallocate64(int fd, int mode, off_t base, off_t len)
{
return fallocate(fd, mode, base, len);
}

View File

@ -9,3 +9,8 @@ ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
/// There was cancellable syscall (syscall_cp), but I don't care.
return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32));
}
ssize_t pwritev64(int fd, const struct iovec *iov, int count, off_t ofs)
{
return pwritev(fd, iov, count, ofs);
}

View File

@ -67,19 +67,7 @@ public:
void swap(Timespan & timespan);
/// Swaps the Timespan with another one.
bool operator==(const Timespan & ts) const;
bool operator!=(const Timespan & ts) const;
bool operator>(const Timespan & ts) const;
bool operator>=(const Timespan & ts) const;
bool operator<(const Timespan & ts) const;
bool operator<=(const Timespan & ts) const;
bool operator==(TimeDiff microSeconds) const;
bool operator!=(TimeDiff microSeconds) const;
bool operator>(TimeDiff microSeconds) const;
bool operator>=(TimeDiff microSeconds) const;
bool operator<(TimeDiff microSeconds) const;
bool operator<=(TimeDiff microSeconds) const;
auto operator<=>(const Timespan & ts) const = default;
Timespan operator+(const Timespan & d) const;
Timespan operator-(const Timespan & d) const;
@ -215,78 +203,6 @@ inline Timespan::TimeDiff Timespan::totalMicroseconds() const
}
inline bool Timespan::operator==(const Timespan & ts) const
{
return _span == ts._span;
}
inline bool Timespan::operator!=(const Timespan & ts) const
{
return _span != ts._span;
}
inline bool Timespan::operator>(const Timespan & ts) const
{
return _span > ts._span;
}
inline bool Timespan::operator>=(const Timespan & ts) const
{
return _span >= ts._span;
}
inline bool Timespan::operator<(const Timespan & ts) const
{
return _span < ts._span;
}
inline bool Timespan::operator<=(const Timespan & ts) const
{
return _span <= ts._span;
}
inline bool Timespan::operator==(TimeDiff microSeconds) const
{
return _span == microSeconds;
}
inline bool Timespan::operator!=(TimeDiff microSeconds) const
{
return _span != microSeconds;
}
inline bool Timespan::operator>(TimeDiff microSeconds) const
{
return _span > microSeconds;
}
inline bool Timespan::operator>=(TimeDiff microSeconds) const
{
return _span >= microSeconds;
}
inline bool Timespan::operator<(TimeDiff microSeconds) const
{
return _span < microSeconds;
}
inline bool Timespan::operator<=(TimeDiff microSeconds) const
{
return _span <= microSeconds;
}
inline void swap(Timespan & s1, Timespan & s2)
{
s1.swap(s2);

View File

@ -30,7 +30,7 @@ elseif (ARCH_AARCH64)
# support it.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc")
else ()
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1, 10]. In particular, it
# includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag
# "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on
# old OSs.
@ -45,19 +45,20 @@ elseif (ARCH_AARCH64)
# dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits
# but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
# ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15.
# rcpc: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton >=2, Azure and GCP instances.
#
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
# [10] https://github.com/aws/aws-graviton-getting-started/blob/main/README.md
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc")
endif ()
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM

View File

@ -45,6 +45,7 @@ if (COMPILER_CLANG)
no_warning(weak-vtables)
no_warning(thread-safety-negative) # experimental flag, too many false positives
no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
no_warning(unsafe-buffer-usage) # too aggressive
# TODO Enable conversion, sign-conversion, double-promotion warnings.
elseif (COMPILER_GCC)
# Add compiler options only to c++ compiler

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit e19cd661e49dd9022d3f920b69d843333b896451
Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441

View File

@ -98,6 +98,16 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
# Since we always use toolchain files to generate hermatic builds, cmake will
# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM
# targets with all tests enabled, which will slow down cmake configuration and
# compilation (You'll see Building native llvm-tblgen...). Let's disable the
# cross compiling indicator for now.
#
# TODO We should let cmake know whether it's indeed a cross compilation in the
# first place.
set (CMAKE_CROSSCOMPILING 0)
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
set_directory_properties (PROPERTIES

View File

@ -159,9 +159,10 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Authentication failed" \
-e "Cannot flush" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv

View File

@ -60,12 +60,21 @@ fi
clickhouse_download_filename_prefix="clickhouse"
clickhouse="$clickhouse_download_filename_prefix"
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
if [ -f "$clickhouse" ]
then
read -p "ClickHouse binary ${clickhouse} already exists. Overwrite? [y/N] " answer
if [ "$answer" = "y" -o "$answer" = "Y" ]
then
rm -f "$clickhouse"
else
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
fi
fi
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo

View File

@ -39,12 +39,59 @@ To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` director
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
### Restricting test runs
A test can have zero or more _test tags_ specifying restrictions for test runs.
For `.sh` tests tags are written as a comment on the second line:
```bash
#!/usr/bin/env bash
# Tags: no-fasttest
```
For `.sql` tests tags are placed in the first line as a SQL comment:
```sql
-- Tags: no-fasttest
SELECT 1
```
|Tag name | What it does | Usage example |
|---|---|---|
| `disabled`| Test is not run ||
| `long` | Test's execution time is extended from 1 to 10 minutes ||
| `deadlock` | Test is run in a loop for a long time ||
| `race` | Same as `deadlock`. Prefer `deadlock` ||
| `shard` | Server is required to listen to `127.0.0.*` ||
| `distributed` | Same as `shard`. Prefer `shard` ||
| `global` | Same as `shard`. Prefer `shard` ||
| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
| `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
| `no-replicated-database` |||
| `no-ordinary-database` |||
| `no-parallel` | Disables running other tests in parallel with this one | Test reads from `system` tables and invariants may be broken|
| `no-parallel-replicas` |||
| `no-debug` |||
| `no-stress` |||
| `no-polymorphic-parts` |||
| `no-random-settings` |||
| `no-random-merge-tree-settings` |||
| `no-backward-compatibility-check` |||
| `no-cpu-x86_64` |||
| `no-cpu-aarch64` |||
| `no-cpu-ppc64le` |||
| `no-s3-storage` |||
In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features.
For example, if your test uses a MySQL table, you should add a tag `use-mysql`.
### Choosing the Test Name
The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.
### Checking for an Error that Must Occur
Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:

View File

@ -26,6 +26,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnt don
- [one-ck](https://github.com/lizhichao/one-ck)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -50,7 +50,7 @@ If there are multiple profiles active for a user, then constraints are merged. M
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
- `readonly=0`: No read-only restrictions.
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
- `readonly=1`: Only read queries are allowed and settings cannot be changed unless `changeable_in_readonly` is set.
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.

View File

@ -48,7 +48,35 @@ When dividing by zero you get inf, -inf, or nan.
## intDiv(a, b)
Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value).
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
Returns an integer of the type of the dividend (the first parameter).
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
**Example**
Query:
```sql
SELECT
intDiv(toFloat64(1), 0.001) AS res,
toTypeName(res)
```
```response
┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐
│ 1000 │ Int64 │
└──────┴─────────────────────────────────────────┘
```
```sql
SELECT
intDiv(1, 0.001) AS res,
toTypeName(res)
```
```response
Received exception from server (version 23.2.1):
Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION)
```
## intDivOrZero(a, b)

View File

@ -1126,15 +1126,48 @@ Rounds the time to the half hour.
## toYYYYMM
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example
```sql
SELECT
toYYYYMM(now(), 'US/Eastern')
```
```response
┌─toYYYYMM(now(), 'US/Eastern')─┐
│ 202303 │
└───────────────────────────────┘
```
## toYYYYMMDD
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example
```sql
SELECT
toYYYYMMDD(now(), 'US/Eastern')
```
```response
┌─toYYYYMMDD(now(), 'US/Eastern')─┐
│ 20230302 │
└─────────────────────────────────┘
```
## toYYYYMMDDhhmmss
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example
```sql
SELECT
toYYYYMMDDhhmmss(now(), 'US/Eastern')
```
```response
┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
│ 20230302112209 │
└───────────────────────────────────────┘
```
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
@ -1231,8 +1264,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %e | day of the month, space-padded (1-31) | &nbsp; 2 |
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %h | hour in 12h format (01-12) | 09 |
| %H | hour in 24h format (00-23) | 22 |
| %i | minute (00-59) | 33 |

View File

@ -579,3 +579,33 @@ Result:
│ 3628800 │
└───────────────┘
```
## width_bucket(operand, low, high, count)
Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`.
`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero.
**Syntax**
```sql
widthBucket(operand, low, high, count)
```
There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases.
**Example**
Query:
``` sql
SELECT widthBucket(10.15, -8.6, 23, 18);
```
Result:
``` text
┌─widthBucket(10.15, -8.6, 23, 18)─┐
│ 11 │
└──────────────────────────────────┘
```

View File

@ -66,6 +66,42 @@ Result:
- [Map(key, value)](../../sql-reference/data-types/map.md) data type
## mapFromArrays
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md).
The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
**Syntax**
```sql
mapFromArrays(keys, values)
```
Alias: `MAP_FROM_ARRAYS(keys, values)`
**Arguments**
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
- `values` - Given value array to create a map from.
**Returned value**
- A map whose keys and values are constructed from the key and value arrays
**Example**
Query:
```sql
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
```
```text
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
│ {'a':1,'b':2,'c':3} │
└───────────────────────────────────────────┘
```
## mapAdd
Collect all the keys and sum corresponding values.
@ -235,7 +271,7 @@ Determines whether the `map` contains the `key` parameter.
mapContains(map, key)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `key` — Key. Type matches the type of keys of `map` parameter.
@ -280,7 +316,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
mapKeys(map)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
@ -323,7 +359,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
mapValues(map)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
@ -362,7 +398,7 @@ Result:
mapContainsKeyLike(map, pattern)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
@ -400,7 +436,7 @@ Result:
mapExtractKeyLike(map, pattern)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
@ -438,7 +474,7 @@ Result:
mapApply(func, map)
```
**Parameters**
**Arguments**
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
@ -478,7 +514,7 @@ Result:
mapFilter(func, map)
```
**Parameters**
**Arguments**
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
@ -520,7 +556,7 @@ Result:
mapUpdate(map1, map2)
```
**Parameters**
**Arguments**
- `map1` [Map](../../sql-reference/data-types/map.md).
- `map2` [Map](../../sql-reference/data-types/map.md).

View File

@ -6,22 +6,23 @@ sidebar_label: Type Conversion
# Type Conversion Functions
## Common Issues of Numeric Conversions
## Common Issues with Data Conversion
When you convert a value from one to another data type, you should remember that if you try to fit a value from a larger data type to a smaller one (for example Int64 to Int32), or convert from one data type to another (for example `String` to `Int`), you could have data loss. Test beforehand.
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected.
ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
## toInt(8\|16\|32\|64\|128\|256)
Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toInt8(expr)`Results in the `Int8` data type.
- `toInt16(expr)`Results in the `Int16` data type.
- `toInt32(expr)`Results in the `Int32` data type.
- `toInt64(expr)`Results in the `Int64` data type.
- `toInt128(expr)`Results in the `Int128` data type.
- `toInt256(expr)`Results in the `Int256` data type.
- `toInt8(expr)`Converts to a value of data type `Int8`.
- `toInt16(expr)`Converts to a value of data type `Int16`.
- `toInt32(expr)`Converts to a value of data type `Int32`.
- `toInt64(expr)`Converts to a value of data type `Int64`.
- `toInt128(expr)`Converts to a value of data type `Int128`.
- `toInt256(expr)`Converts to a value of data type `Int256`.
**Arguments**
@ -53,7 +54,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrZero
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns 0.
Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`.
**Example**
@ -73,7 +74,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrNull
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns NULL.
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`.
**Example**
@ -93,7 +94,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrDefault
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default type value.
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value.
**Example**
@ -116,11 +117,11 @@ Result:
Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toUInt8(expr)`Results in the `UInt8` data type.
- `toUInt16(expr)`Results in the `UInt16` data type.
- `toUInt32(expr)`Results in the `UInt32` data type.
- `toUInt64(expr)`Results in the `UInt64` data type.
- `toUInt256(expr)`Results in the `UInt256` data type.
- `toUInt8(expr)`Converts to a value of data type `UInt8`.
- `toUInt16(expr)`Converts to a value of data type `UInt16`.
- `toUInt32(expr)`Converts to a value of data type `UInt32`.
- `toUInt64(expr)`Converts to a value of data type `UInt64`.
- `toUInt256(expr)`Converts to a value of data type `UInt256`.
**Arguments**
@ -128,7 +129,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint
**Returned value**
Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
@ -166,26 +167,30 @@ Result:
## toDate
Converts the argument to `Date` data type.
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime:
```sql
SELECT
now() AS x,
toDate(x)
```
```response
┌───────────────────x─┬─toDate(now())─┐
│ 2022-12-30 13:44:17 │ 2022-12-30 │
└─────────────────────┴───────────────┘
```
If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used:
If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used:
```sql
SELECT
toDate('2022-12-30') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐
│ 2022-12-30 │ Date │
@ -193,18 +198,20 @@ SELECT
1 row in set. Elapsed: 0.001 sec.
```
```sql
SELECT
toDate('2022-12-30 01:02:03') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐
│ 2022-12-30 │ Date │
└────────────┴───────────────────────────────────────────┘
```
If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone:
If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone:
```sql
SELECT
@ -217,6 +224,7 @@ SELECT
toDate(ts) AS date_Amsterdam_2,
toDate(ts, 'Pacific/Apia') AS date_Samoa_2
```
```response
Row 1:
──────
@ -232,7 +240,7 @@ date_Samoa_2: 2022-12-31
The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones.
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example:
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example:
```sql
SELECT toDate(12345)
@ -270,8 +278,6 @@ SELECT
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
```
Have a nice day working with dates and times.
## toDateOrZero
## toDateOrNull
@ -288,7 +294,7 @@ Have a nice day working with dates and times.
## toDate32
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
**Syntax**
@ -302,9 +308,7 @@ toDate32(expr)
**Returned value**
- A calendar date.
Type: [Date32](/docs/en/sql-reference/data-types/date32.md).
- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md).
**Example**
@ -332,7 +336,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value);
└────────────┴────────────────────────────────────┘
```
3. With `Date`-type argument:
3. With [Date](/docs/en/sql-reference/data-types/date.md) argument:
``` sql
SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
@ -386,7 +390,7 @@ Result:
## toDate32OrDefault
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received.
**Example**
@ -666,7 +670,7 @@ YYYY-MM-DD
YYYY-MM-DD hh:mm:ss
```
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing toDate(unix_timestamp), which otherwise would be an error and would require writing the more cumbersome toDate(toDateTime(unix_timestamp)).
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing `toDate(unix_timestamp)`, which otherwise would be an error and would require writing the more cumbersome `toDate(toDateTime(unix_timestamp))`.
Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time.
@ -696,7 +700,7 @@ Also see the `toUnixTimestamp` function.
## toFixedString(s, N)
Converts a String type argument to a FixedString(N) type (a string with fixed length N). N must be a constant.
Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N).
If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown.
## toStringCutToZero(s)
@ -914,7 +918,7 @@ Result:
└─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘
```
Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported.
@ -1174,7 +1178,7 @@ For all of the formats with separator the function parses months names expressed
**Returned value**
- `time_string` converted to the `DateTime` data type.
- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
**Examples**
@ -1254,10 +1258,10 @@ Result:
**See Also**
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
- [toDate](#todate)
- [toDateTime](#todatetime)
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
## parseDateTimeBestEffortUS

View File

@ -19,8 +19,15 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_n
## Example
Create a user:
```sql
CREATE USER robin IDENTIFIED BY 'password';
```
Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`:
``` sql
CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin
CREATE
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
TO robin
```

View File

@ -17,10 +17,11 @@ By default, tables are created only on the current server. Distributed DDL queri
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2],
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
...
) ENGINE = engine
COMMENT 'comment for table'
```
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
@ -32,6 +33,8 @@ Expressions can also be defined for default values (see below).
If necessary, primary key can be specified, with one or more key expressions.
Comments can be added for columns and for the table.
### With a Schema Similar to Other Table
``` sql

View File

@ -24,6 +24,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -301,7 +301,7 @@ ClickHouse поддерживает временные таблицы со сл
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
- Временная таблица использует только модуль памяти.
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
- Невозможно создать временную таблицу распределнным DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
- При распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы.
@ -344,7 +344,9 @@ REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
### Синтаксис
```sql
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
```
Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку.

View File

@ -108,7 +108,7 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
## Модификатор WITH CUBE {#with-cube-modifier}
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Модификатор `WITH CUBE` применяется для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.

View File

@ -16,7 +16,7 @@ FROM <left_table>
(ON <expr_list>)|(USING <column_list>) ...
```
Выражения из секции `ON` и столбцы из секции `USING` называется «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
Выражения из секции `ON` и столбцы из секции `USING` называются «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
## Поддерживаемые типы соединения {#select-join-types}
@ -28,7 +28,7 @@ FROM <left_table>
- `FULL OUTER JOIN`, не совпадающие строки из обеих таблиц возвращаются в дополнение к совпадающим строкам.
- `CROSS JOIN`, производит декартово произведение таблиц целиком, ключи соединения не указываются.
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является ли указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
Дополнительные типы соединений, доступные в ClickHouse:
@ -62,7 +62,7 @@ FROM <left_table>
Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат.
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый агрумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
:::note "Примечание"
Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`).
@ -280,7 +280,7 @@ SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b);
Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../../../engines/table-engines/special/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке.
В некоторых случаях это более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
В некоторых случаях более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе «Внешние словари».

View File

@ -67,7 +67,7 @@ sidebar_label: ORDER BY
## Примеры с использованием сравнения {#collation-examples}
Пример с значениями типа [String](../../../sql-reference/data-types/string.md):
Пример со значениями типа [String](../../../sql-reference/data-types/string.md):
Входная таблица:
@ -241,13 +241,13 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
└───┴─────────┘
```
## Деталь реализации {#implementation-details}
## Детали реализации {#implementation-details}
Если кроме `ORDER BY` указан также не слишком большой [LIMIT](limit.md), то расходуется меньше оперативки. Иначе расходуется количество памяти, пропорциональное количеству данных для сортировки. При распределённой обработке запроса, если отсутствует [GROUP BY](group-by.md), сортировка частично делается на удалённых серверах, а на сервере-инициаторе запроса производится слияние результатов. Таким образом, при распределённой сортировке, может сортироваться объём данных, превышающий размер памяти на одном сервере.
Существует возможность выполнять сортировку во внешней памяти (с созданием временных файлов на диске), если оперативной памяти не хватает. Для этого предназначена настройка `max_bytes_before_external_sort`. Если она выставлена в 0 (по умолчанию), то внешняя сортировка выключена. Если она включена, то при достижении объёмом данных для сортировки указанного количества байт, накопленные данные будут отсортированы и сброшены во временный файл. После того, как все данные будут прочитаны, будет произведено слияние всех сортированных файлов и выдача результата. Файлы записываются в директорию `/var/lib/clickhouse/tmp/` (по умолчанию, может быть изменено с помощью параметра `tmp_path`) в конфиге.
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому, значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставите `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставьте `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
Внешняя сортировка работает существенно менее эффективно, чем сортировка в оперативке.
@ -366,9 +366,9 @@ ORDER BY
└────────────┴────────────┴──────────┘
```
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последовательность заполнения для `d1`.
едующий запрос (с измененым порядком в ORDER BY):
едующий запрос (с измененным порядком в ORDER BY):
```sql
SELECT
toDate((number * 10) * 86400) AS d1,

View File

@ -13,7 +13,7 @@ Prewhere — это оптимизация для более эффективн
`PREWHERE` имеет смысл использовать, если есть условия фильтрации, которые использует меньшинство столбцов из тех, что есть в запросе, но достаточно сильно фильтрует данные. Таким образом, сокращается количество читаемых данных.
В запросе может быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
В запросе могут быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
Если значение параметра [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) равно 0, эвристика по автоматическому перемещению части выражений из `WHERE` к `PREWHERE` отключается.

View File

@ -10,7 +10,7 @@ sidebar_label: SAMPLE
Сэмплирование имеет смысл, когда:
1. Точность результата не важна, например, для оценочных расчетов.
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть \&lt;100 мс. При этом точность расчета имеет более низкий приоритет.
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть &lt;100 мс. При этом точность расчета имеет более низкий приоритет.
3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской.
:::note "Внимание"

View File

@ -26,7 +26,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c
Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса.
При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
При объединении выполняется приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
Запросы, которые являются частью `UNION`, могут быть заключены в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION` запросы в подзапрос в секции [FROM](from.md).

View File

@ -5,7 +5,7 @@ sidebar_label: WITH
# Секция WITH {#with-clause}
Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
ClickHouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
## Синтаксис

View File

@ -19,7 +19,6 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
- `shard_name` — 分片的名字。数据库副本按`shard_name`分组到分片中。
- `replica_name` — 副本的名字。同一分片的所有副本的副本名称必须不同。
!!! note "警告"
对于[ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication)表,如果没有提供参数,则使用默认参数:`/clickhouse/tables/{uuid}/{shard}`和`{replica}`。这些可以在服务器设置[default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path)和[default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name)中更改。宏`{uuid}`被展开到表的uuid `{shard}`和`{replica}`被展开到服务器配置的值而不是数据库引擎参数。但是在将来可以使用Replicated数据库的`shard_name`和`replica_name`。
## 使用方式 {#specifics-and-recommendations}
@ -52,8 +51,8 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
```
``` text
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
@ -62,13 +61,13 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
显示系统表:
``` sql
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
FROM system.clusters WHERE cluster='r';
```
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
@ -83,9 +82,9 @@ node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
```
``` text
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
└───────┴───────────────┘
```
@ -98,8 +97,8 @@ node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
集群配置如下所示:
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
@ -113,8 +112,8 @@ node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
```
```text
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
└───────┴───────────────┘
```

View File

@ -1,6 +1,6 @@
---
slug: /zh/engines/table-engines/mergetree-family/mergetree
---
---
slug: /zh/engines/table-engines/mergetree-family/mergetree
---
# MergeTree {#table_engines-mergetree}
Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。
@ -25,8 +25,9 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及
需要的话,您可以给表设置一个采样方法。
!!! note "注意"
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
:::info
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
:::
## 建表 {#table_engine-mergetree-creating-a-table}
@ -364,7 +365,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。
!!! note "注意"
:::note
布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1``bloom_filter` 索引不能用于结果返回为假的函数,例如:
- 可以用来优化的场景
@ -379,6 +380,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
- `NOT s = 1`
- `s != 1`
- `NOT startsWith(s, 'test')`
:::
## 并发数据访问 {#concurrent-data-access}

View File

@ -45,7 +45,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 查看示例
**分布式设置**
- `fsync_after_insert` - 对异步插入到分布式的文件数据执行`fsync`。确保操作系统将所有插入的数据刷新到启动节点**磁盘上的一个文件**中。
@ -66,19 +66,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
- `monitor_max_sleep_time_ms` - 等同于 [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms)
!!! note "备注"
::note
**稳定性设置** (`fsync_...`):
**稳定性设置** (`fsync_...`):
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上然后再异步发送到shard。
— 可能会显著降低`insert`的性能
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上然后再异步发送到shard。
— 可能会显著降低`insert`的性能
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
**插入限制设置** (`..._insert`) 请见:
**插入限制设置** (`..._insert`) 请见:
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
- `bytes_to_throw_insert``bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
:::
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
- `bytes_to_throw_insert``bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
**示例**
``` sql
@ -214,7 +215,7 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
## 读取数据 {#distributed-reading-data}
当查询一个`Distributed`表时,`SELECT`查询被发送到所有的分片,不管数据是如何分布在分片上的(它们可以完全随机分布)。当您添加一个新分片时,您不必将旧数据传输到它。相反,您可以使用更重的权重向其写入新数据——数据的分布会稍微不均匀,但查询将正确有效地工作。
当启用`max_parallel_replicas`选项时,查询处理将在单个分片中的所有副本之间并行化。更多信息,请参见[max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas)。
@ -225,8 +226,9 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
- `_shard_num` — 表`system.clusters` 中的 `shard_num` 值 . 数据类型: [UInt32](../../../sql-reference/data-types/int-uint.md).
!!! note "备注"
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
:::note
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
:::
**详见**
- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) 描述

View File

@ -617,8 +617,9 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上其中没有其他进程包括运行的ClickHouse
!!! note "注意"
ZooKeeper不是一个严格的要求在某些简单的情况下您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
:::note
ZooKeeper不是一个严格的要求在某些简单的情况下您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
:::
ZooKeeper位置在配置文件中指定:

View File

@ -685,8 +685,9 @@ CREATE TABLE IF NOT EXISTS example_table
- 如果`input_format_defaults_for_omitted_fields = 0`, 那么`x`和`a`的默认值等于`0`(作为`UInt32`数据类型的默认值)。
- 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。
!!! note "注意"
:::warning
当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比ClickHouse消耗更多的计算资源。
:::
### Selecting Data {#selecting-data}
@ -708,8 +709,9 @@ CREATE TABLE IF NOT EXISTS example_table
与[JSON](#json)格式不同没有替换无效的UTF-8序列。值以与`JSON`相同的方式转义。
!!! note "提示"
:::info
字符串中可以输出任意一组字节。如果您确信表中的数据可以被格式化为JSON而不会丢失任何信息那么就使用`JSONEachRow`格式。
:::
### Nested Structures {#jsoneachrow-nested}
@ -1216,9 +1218,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
SELECT * FROM topic1_stream;
```
!!! note "警告"
设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。
:::warning
设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。
:::
## Parquet {#data-format-parquet}

View File

@ -188,8 +188,9 @@ $ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
!!! note "警告"
一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。
:::warning
一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。
:::
您可以使用`database`URL参数或`X-ClickHouse-Database`头来指定默认数据库。
@ -447,8 +448,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:
max_final_threads 2
```
!!! note "警告"
在一个`predefined_query_handler`中只支持insert类型的一个`查询`。
:::warning
在一个`predefined_query_handler`中只支持insert类型的一个`查询`。
:::
### 动态查询 {#dynamic_query_handler}

View File

@ -24,6 +24,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [one-ck](https://github.com/lizhichao/one-ck)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -13,5 +13,6 @@ sidebar_position: 24
- [GUI](../../interfaces/third-party/gui.md)
- [Proxies](../../interfaces/third-party/proxy.md)
!!! note "注意"
:::note
支持通用API的通用工具[ODBC](../../interfaces/odbc.md)或[JDBC](../../interfaces/jdbc.md)通常也适用于ClickHouse但这里没有列出因为它们实在太多了。
:::

View File

@ -24,9 +24,9 @@ ClickHouse权限实体包括
我们建议你使用SQL工作流的方式。当然配置的方式也可以同时起作用, 所以如果你正在用服务端配置的方式来管理权限和账户你可以平滑的切换到SQL驱动的工作流方式。
!!! note "警告"
你无法同时使用两个配置的方式来管理同一个权限实体。
:::warning
你无法同时使用两个配置的方式来管理同一个权限实体。
:::
## 用法 {#access-control-usage}

View File

@ -12,8 +12,9 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD"
不同公司有不同的可用资源和业务需求因此不存在一个通用的解决方案可以应对各种情况下的ClickHouse备份和恢复。 适用于 1GB 数据的方案可能并不适用于几十 PB 数据的情况。 有多种具备各自优缺点的可能方法,将在下面对其进行讨论。最好使用几种方法而不是仅仅使用一种方法来弥补它们的各种缺点。。
!!! note "注"
需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此无论您选择哪种备份方法请确保自动还原过程并定期在备用ClickHouse群集上演练。
:::note
需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此无论您选择哪种备份方法请确保自动还原过程并定期在备用ClickHouse群集上演练。
:::
## 将源数据复制到其它地方 {#duplicating-source-data-somewhere-else}

View File

@ -528,8 +528,9 @@ SSL客户端/服务器配置。
包含数据的目录的路径。
!!! note "注"
尾部斜杠是强制性的。
:::note
尾部斜杠是强制性的。
:::
**示例**
@ -714,8 +715,9 @@ TCP端口用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
用于处理大型查询的临时数据的路径。
!!! note "注"
尾部斜杠是强制性的。
:::note
尾部斜杠是强制性的。
:::
**示例**
@ -728,11 +730,12 @@ TCP端口用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
从政策 [`storage_configuration`](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 存储临时文件。
如果没有设置 [`tmp_path`](#server-settings-tmp_path) 被使用,否则被忽略。
!!! note "注"
- `move_factor` 被忽略
:::note
- `move_factor` 被忽略
- `keep_free_space_bytes` 被忽略
- `max_data_part_size_bytes` 被忽略
-您必须在该政策中只有一个卷
:::
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}

View File

@ -8,8 +8,9 @@ sidebar_label: "\u8BBE\u7F6E\u914D\u7F6E"
设置配置是设置的集合,并按照相同的名称进行分组。
!!! note "信息"
ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。
:::info
ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。
:::
设置配置可以任意命名。你可以为不同的用户指定相同的设置配置。您可以在设置配置中写入的最重要的内容是 `readonly=1`,这将确保只读访问。

View File

@ -10,8 +10,9 @@ sidebar_label: "\u7528\u6237\u8BBE\u7F6E"
`user.xml` 中的 `users` 配置段包含了用户配置
!!! note "提示"
ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。
:::note
ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。
:::
`users` 配置段的结构:

View File

@ -266,8 +266,9 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
执行时 `INSERT` 查询时,将省略的输入列值替换为相应列的默认值。 此选项仅适用于 [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) 和 [TabSeparated](../../interfaces/formats.md#tabseparated) 格式。
!!! note "注"
启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。
:::note
启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。
:::
可能的值:

View File

@ -99,8 +99,9 @@ slug: /zh/operations/system-tables/parts
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
!!! note "警告"
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min``move_ttl_info.max` 字段。
:::warning
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min``move_ttl_info.max` 字段。
:::
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。

View File

@ -8,8 +8,9 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
包含已执行查询的相关信息,例如:开始时间、处理持续时间、错误消息。
!!! note "注"
此表不包含以下内容的摄取数据 `INSERT` 查询。
:::note
此表不包含以下内容的摄取数据 `INSERT` 查询。
:::
您可以更改query_log的设置在服务器配置的 [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 部分。

View File

@ -12,5 +12,6 @@ sidebar_position: 107
计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -12,5 +12,6 @@ covarPop(x, y)
计算 `Σ((x - x̅)(y - y̅)) / n` 的值。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
:::

View File

@ -14,5 +14,6 @@ covarSamp(x, y)
返回Float64。 当 `n <= 1`, 返回 +∞。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -37,8 +37,9 @@ quantileTiming(level)(expr)
否则计算结果将四舍五入到16毫秒的最接近倍数。
!!! note "注"
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::note
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::
**返回值**
@ -46,8 +47,9 @@ quantileTiming(level)(expr)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::note
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::
**示例**

View File

@ -39,8 +39,9 @@ quantileTimingWeighted(level)(expr, weight)
否则计算结果将四舍五入到16毫秒的最接近倍数。
!!! note "注"
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::note
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::
**返回值**
@ -48,8 +49,9 @@ quantileTimingWeighted(level)(expr, weight)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::note
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::
**示例**

View File

@ -7,5 +7,6 @@ sidebar_position: 30
结果等于 [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -7,5 +7,6 @@ sidebar_position: 31
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -36,8 +36,9 @@ uniqCombined(HLL_precision)(x[, ...])
- 确定性地提供结果(它不依赖于查询处理顺序)。
!!! note "注"
由于它对非 `String` 类型使用32位哈希对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
:::note
由于它对非 `String` 类型使用32位哈希对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
:::
相比于 [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) 函数, 该 `uniqCombined`:

View File

@ -9,5 +9,6 @@ sidebar_position: 32
换句话说,计算一组数据的离差。 返回 `Float64`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -11,5 +11,6 @@ sidebar_position: 33
返回 `Float64`。 当 `n <= 1`,返回 `+∞`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -6,8 +6,9 @@ sidebar_label: "ANSI\u517C\u5BB9\u6027"
# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
!!! note "注"
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
:::note
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
:::
## 行为差异 {#differences-in-behaviour}

View File

@ -25,10 +25,10 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
!!! note "注"
`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。
`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction`
:::note
`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。
`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction`
:::
**参数**

View File

@ -42,8 +42,9 @@ slug: /zh/sql-reference/functions/string-search-functions
对于不区分大小写的搜索或/和UTF-8格式使用函数`multiSearchAnyCaseInsensitivemultiSearchAnyUTF8multiSearchAnyCaseInsensitiveUTF8`。
!!! note "注意"
在所有`multiSearch*`函数中由于实现规范needles的数量应小于2<sup>8</sup>
:::note
在所有`multiSearch*`函数中由于实现规范needles的数量应小于2<sup>8</sup>
:::
## 匹配(大海捞针,模式) {#matchhaystack-pattern}
@ -60,8 +61,9 @@ slug: /zh/sql-reference/functions/string-search-functions
与`match`相同但如果所有正则表达式都不匹配则返回0如果任何模式匹配则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式最好使用«multisearchany»因为它更高效。
!!! note "注意"
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节否则抛出异常。这种限制是因为hyperscan API而产生的。
:::note
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节否则抛出异常。这种限制是因为hyperscan API而产生的。
:::
## multiMatchAnyIndex大海捞针\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
@ -75,11 +77,13 @@ slug: /zh/sql-reference/functions/string-search-functions
与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。
!!! note "注意"
`multiFuzzyMatch*`函数不支持UTF-8正则表达式由于hyperscan限制这些表达式被按字节解析。
:::note
`multiFuzzyMatch*`函数不支持UTF-8正则表达式由于hyperscan限制这些表达式被按字节解析。
:::
!!! note "注意"
如要关闭所有hyperscan函数的使用请设置`SET allow_hyperscan = 0;`。
:::note
如要关闭所有hyperscan函数的使用请设置`SET allow_hyperscan = 0;`。
:::
## 提取(大海捞针,图案) {#extracthaystack-pattern}
@ -119,5 +123,6 @@ slug: /zh/sql-reference/functions/string-search-functions
对于不区分大小写的搜索或/和UTF-8格式使用函数`ngramSearchCaseInsensitivengramSearchUTF8ngramSearchCaseInsensitiveUTF8`。
!!! note "注意"
对于UTF-8我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram然后计算这些哈希表之间的对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位从零开始和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
:::note
对于UTF-8我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram然后计算这些哈希表之间的对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位从零开始和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
:::

View File

@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
删除匹配指定过滤表达式的数据。实现为[突变](../../../sql-reference/statements/alter/index.md#mutations).
!!! note "备注"
`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示与OLTP数据库中的类似查询不同这是一个不为经常使用而设计的繁重操作。
:::note
`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示与OLTP数据库中的类似查询不同这是一个不为经常使用而设计的繁重操作。
:::
`filter_expr` 的类型必须是`UInt8`。该查询删除表中该表达式接受非零值的行。

View File

@ -17,8 +17,9 @@ sidebar_label: ALTER
- [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md)
- [TTL](../../../sql-reference/statements/alter/ttl.md)
!!! note "备注"
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
:::note
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
:::
这些 `ALTER` 语句操作视图:

View File

@ -14,5 +14,6 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY ORDER BY new_expression
从某种意义上说,该命令是轻量级的,它只更改元数据。要保持数据部分行按排序键表达式排序的属性,您不能向排序键添加包含现有列的表达式(仅在相同的`ALTER`查询中由`ADD COLUMN`命令添加的列,没有默认的列值)。
!!! note "备注"
它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。
:::note
它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。
:::

View File

@ -14,8 +14,9 @@ sidebar_label: SETTING
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
```
!!! note "注意"
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
:::note
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
:::
## 修改设置 {#alter_modify_setting}

View File

@ -12,8 +12,9 @@ ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
操作与指定过滤表达式相匹配的数据。作为一个[变更 mutation](../../../sql-reference/statements/alter/index.md#mutations)来实现.
!!! note "Note"
`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明与OLTP数据库中的类似查询不同这是一个繁重的操作不是为频繁使用而设计。
:::note
`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明与OLTP数据库中的类似查询不同这是一个繁重的操作不是为频繁使用而设计。
:::
`filter_expr`必须是`UInt8`类型。这个查询将指定列的值更新为行中相应表达式的值,对于这些行,`filter_expr`取值为非零。使用`CAST`操作符将数值映射到列的类型上。不支持更新用于计算主键或分区键的列。

View File

@ -9,8 +9,9 @@ sidebar_label: EXCHANGE
以原子方式交换两个表或字典的名称。
此任务也可以通过使用[RENAME](./rename.md)来完成,但在这种情况下操作不是原子的。
!!! note "注意"
:::note
`EXCHANGE`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎.
:::
**语法**

View File

@ -9,8 +9,9 @@ sidebar_label: RENAME
重命名数据库、表或字典。 可以在单个查询中重命名多个实体。
请注意,具有多个实体的`RENAME`查询是非原子操作。 要以原子方式交换实体名称,请使用[EXCHANGE](./exchange.md)语法.
!!! note "注意"
:::note
`RENAME`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎.
:::
**语法**

View File

@ -11,8 +11,9 @@ sidebar_label: GROUP BY
- 在所有的表达式在 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句中 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。
- 聚合结果 `SELECT` 查询将包含尽可能多的行,因为有唯一值 “grouping key” 在源表中。 通常这会显着减少行数,通常是数量级,但不一定:如果所有行数保持不变 “grouping key” 值是不同的。
!!! note "注"
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
:::note
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
:::
## 空处理 {#null-processing}

View File

@ -39,8 +39,9 @@ ClickHouse中提供的其他联接类型:
## 严格 {#join-settings}
!!! note "注"
可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。
:::note
可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。
:::
Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
@ -91,8 +92,9 @@ USING (equi_column1, ... equi_columnN, asof_column)
`ASOF JOIN`会从 `table_2` 中的用户事件时间戳找出和 `table_1` 中用户事件时间戳中最近的一个时间戳,来满足最接近匹配的条件。如果有得话,则相等的时间戳值是最接近的值。在此例中,`user_id` 列可用于条件匹配,`ev_time` 列可用于最接近匹配。在此例中,`event_1_1` 可以 JOIN `event_2_1``event_1_2` 可以JOIN `event_2_3`,但是 `event_2_2` 不能被JOIN。
!!! note "注"
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。
:::note
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。
:::
## 分布式联接 {#global-join}

View File

@ -14,8 +14,9 @@ ClickHouse支持以下语法变体:
处理查询时ClickHouse首先选择经由排序键排序过后的数据。排序键可以显式地使用[ORDER BY](order-by.md#select-order-by)从句指定,或隐式地使用表引擎使用的排序键(数据的顺序仅在使用[ORDER BY](order-by.md#select-order-by)时才可以保证否则由于多线程处理数据顺序会随机化。然后ClickHouse执行`LIMIT n BY expressions`从句,将每一行按 `expressions` 的值进行分组,并对每一分组返回前`n`行。如果指定了`OFFSET`那么对于每一分组ClickHouse会跳过前`offset_value`行,接着返回前`n`行。如果`offset_value`大于某一分组的行数ClickHouse会从分组返回0行。
!!! note "注"
`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。
:::note
`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。
:::
## 例 {#examples}

View File

@ -15,8 +15,9 @@ sidebar_label: SAMPLE
- 当您的原始数据不准确时,所以近似不会明显降低质量。
- 业务需求的目标是近似结果(为了成本效益,或者向高级用户推销确切结果)。
!!! note "注"
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
:::note
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
:::
下面列出了数据采样的功能:

View File

@ -11,9 +11,10 @@ sidebar_label: WHERE
如果基础表引擎支持,`WHERE`表达式会使用索引和分区进行剪枝。
!!! note "注"
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
:::note
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
:::
如果需要测试一个 [NULL](../../../sql-reference/syntax.md#null-literal) 值,请使用 [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) 运算符或 [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) 和 [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) 函数。否则带有 NULL 的表达式永远不会通过。
**示例**

View File

@ -124,10 +124,9 @@ ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
```
!!! note "Note"
`DETACH / ATTACH` 表操作会在后台进行表的merge操作甚至当所有MergeTree表的合并操作已经停止的情况下。
:::note
`DETACH / ATTACH` 表操作会在后台进行表的merge操作甚至当所有MergeTree表的合并操作已经停止的情况下。
:::
### START MERGES {#query_language-system-start-merges}

View File

@ -49,8 +49,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database',
与原始MySQL表具有相同列的表对象。
!!! note "注意"
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
:::note
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
:::
## 用法示例 {#usage-example}

View File

@ -14,7 +14,8 @@ User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=%p # %p is resolved to the systemd unit name
# %p is resolved to the systemd unit name
RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/%p

View File

@ -18,7 +18,7 @@ Group=clickhouse
Restart=always
RestartSec=30
# Since ClickHouse is systemd aware default 1m30sec may not be enough
TimeoutStartSec=inifinity
TimeoutStartSec=infinity
# %p is resolved to the systemd unit name
RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid

View File

@ -158,6 +158,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
if (cloned)
{
writeStringBinary("1", out);
out.finalize();
return;
}
else
{

View File

@ -398,15 +398,30 @@
fill: var(--logo-color);
}
#cloud-logo
{
color: var(--background-color);
text-shadow: 0rem 0rem 2rem var(--logo-color);
font-size: 10vw;
display: block;
}
#logo:hover
{
fill: var(--logo-color-active);
color: var(--logo-color-active);
}
#cloud-logo:hover
{
filter: brightness(150%);
}
#logo-container
{
text-align: center;
margin-top: 5em;
line-height: 0.75;
}
#chart
@ -487,6 +502,7 @@
</g>
</svg>
</a>
<a id="cloud-logo" href="https://clickhouse.cloud/"></a>
</p>
</body>
@ -669,6 +685,33 @@
elem.selectionStart = selection_start + 4;
elem.selectionEnd = selection_start + 4;
e.preventDefault();
return false;
} else if (e.key === 'Enter') {
// If the user presses Enter, and the previous line starts with spaces,
// then we will insert the same number of spaces.
const elem = e.target;
if (elem.selectionStart !== elem.selectionEnd) {
// If there is a selection, then we will not insert spaces.
return;
}
const cursor_pos = elem.selectionStart;
const elem_value = elem.value;
const text_before_cursor = elem_value.substring(0, cursor_pos);
const text_after_cursor = elem_value.substring(cursor_pos);
const prev_lines = text_before_cursor.split('\n');
const prev_line = prev_lines.pop();
const lead_spaces = prev_line.match(/^\s*/)[0];
if (!lead_spaces) {
return;
}
// Add leading spaces to the current line.
elem.value = text_before_cursor + '\n' + lead_spaces + text_after_cursor;
elem.selectionStart = cursor_pos + lead_spaces.length + 1;
elem.selectionEnd = elem.selectionStart;
e.preventDefault();
return false;
}

View File

@ -1,8 +1,8 @@
#include <Access/Common/AllowedClientHosts.h>
#include <Common/Exception.h>
#include <Common/likePatternToRegexp.h>
#include <Common/logger_useful.h>
#include <base/scope_guard.h>
#include <Functions/likePatternToRegexp.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/RegularExpression.h>
#include <boost/algorithm/string/predicate.hpp>

View File

@ -5,8 +5,10 @@
#include <IO/Operators.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Analyzer/Utils.h>
#include <Analyzer/ColumnNode.h>
namespace DB
{
@ -52,8 +54,24 @@ ASTPtr ArrayJoinNode::toASTImpl() const
auto array_join_ast = std::make_shared<ASTArrayJoin>();
array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
const auto & join_expression_list_node = getJoinExpressionsNode();
array_join_ast->children.push_back(join_expression_list_node->toAST());
auto array_join_expressions_ast = std::make_shared<ASTExpressionList>();
const auto & array_join_expressions = getJoinExpressions().getNodes();
for (const auto & array_join_expression : array_join_expressions)
{
ASTPtr array_join_expression_ast;
auto * column_node = array_join_expression->as<ColumnNode>();
if (column_node && column_node->getExpression())
array_join_expression_ast = column_node->getExpression()->toAST();
else
array_join_expression_ast = array_join_expression->toAST();
array_join_expression_ast->setAlias(array_join_expression->getAlias());
array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
}
array_join_ast->children.push_back(std::move(array_join_expressions_ast));
array_join_ast->expression_list = array_join_ast->children.back();
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();

View File

@ -110,8 +110,15 @@ ASTPtr ColumnNode::toASTImpl() const
}
else if (auto * table_node = column_source->as<TableNode>())
{
const auto & table_storage_id = table_node->getStorageID();
column_identifier_parts = {table_storage_id.getDatabaseName(), table_storage_id.getTableName()};
if (!table_node->getTemporaryTableName().empty())
{
column_identifier_parts = { table_node->getTemporaryTableName() };
}
else
{
const auto & table_storage_id = table_node->getStorageID();
column_identifier_parts = { table_storage_id.getDatabaseName(), table_storage_id.getTableName() };
}
}
}
}

View File

@ -348,7 +348,7 @@ ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
auto replacement_ast = std::make_shared<ASTColumnsReplaceTransformer::Replacement>();
replacement_ast->name = replacements_names[i];
replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST());
ast_replace_transformer->children.push_back(replacement_ast);
ast_replace_transformer->children.push_back(std::move(replacement_ast));
}
return ast_replace_transformer;

View File

@ -263,6 +263,13 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back();
nodes_to_clone.pop_back();
auto already_cloned_node_it = old_pointer_to_new_pointer.find(node_to_clone);
if (already_cloned_node_it != old_pointer_to_new_pointer.end())
{
*place_for_cloned_node = already_cloned_node_it->second;
continue;
}
auto it = replacement_map.find(node_to_clone);
auto node_clone = it != replacement_map.end() ? it->second : node_to_clone->cloneImpl();
*place_for_cloned_node = node_clone;

View File

@ -40,15 +40,7 @@ void IdentifierNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_sta
bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const IdentifierNode &>(rhs);
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
return false;
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
return false;
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
return false;
return identifier == rhs_typed.identifier;
return identifier == rhs_typed.identifier && table_expression_modifiers == rhs_typed.table_expression_modifiers;
}
void IdentifierNode::updateTreeHashImpl(HashState & state) const

View File

@ -102,19 +102,21 @@ public:
if (!left_argument_constant_node && !right_argument_constant_node)
return;
/** If we extract negative constant, aggregate function name must be updated.
/** Need reverse max <-> min for:
*
* Example: SELECT min(-1 * id);
* Result: SELECT -1 * max(id);
* max(-1*value) -> -1*min(value)
* max(value/-2) -> min(value)/-2
* max(1-value) -> 1-min(value)
*/
std::string aggregate_function_name_if_constant_is_negative;
if (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide")
auto get_reverse_aggregate_function_name = [](const std::string & aggregate_function_name) -> std::string
{
if (lower_aggregate_function_name == "min")
aggregate_function_name_if_constant_is_negative = "max";
else if (lower_aggregate_function_name == "max")
aggregate_function_name_if_constant_is_negative = "min";
}
if (aggregate_function_name == "min")
return "max";
else if (aggregate_function_name == "max")
return "min";
else
return aggregate_function_name;
};
size_t arithmetic_function_argument_index = 0;
@ -126,11 +128,11 @@ public:
/// Rewrite `aggregate_function(inner_function(constant, argument))` into `inner_function(constant, aggregate_function(argument))`
const auto & left_argument_constant_value_literal = left_argument_constant_node->getValue();
if (!aggregate_function_name_if_constant_is_negative.empty() &&
left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
{
lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative;
}
bool need_reverse = (arithmetic_function_name == "multiply" && left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
|| (arithmetic_function_name == "minus");
if (need_reverse)
lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name);
arithmetic_function_argument_index = 1;
}
@ -138,11 +140,10 @@ public:
{
/// Rewrite `aggregate_function(inner_function(argument, constant))` into `inner_function(aggregate_function(argument), constant)`
const auto & right_argument_constant_value_literal = right_argument_constant_node->getValue();
if (!aggregate_function_name_if_constant_is_negative.empty() &&
right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
{
lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative;
}
bool need_reverse = (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide") && right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal);
if (need_reverse)
lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name);
arithmetic_function_argument_index = 0;
}

View File

@ -4,12 +4,13 @@
#include <unordered_map>
#include <vector>
#include <Common/likePatternToRegexp.h>
#include <Core/Field.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/likePatternToRegexp.h>
#include <Interpreters/Context.h>

View File

@ -1,5 +1,7 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <DataTypes/getLeastSupertype.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/JoinNode.h>
@ -152,25 +154,34 @@ public:
QueryTreeNodes other_conditions;
exctractJoinConditions(where_condition, equi_conditions, other_conditions);
bool can_convert_cross_to_inner = false;
for (auto & cond : equi_conditions)
for (auto & condition : equi_conditions)
{
auto left_src = getExpressionSource(getEquiArgument(cond, 0));
auto right_src = getExpressionSource(getEquiArgument(cond, 1));
if (left_src.second && right_src.second && left_src.first && right_src.first)
{
bool can_join_on = (findInTableExpression(left_src.first, left_table) && findInTableExpression(right_src.first, right_table))
|| (findInTableExpression(left_src.first, right_table) && findInTableExpression(right_src.first, left_table));
const auto & lhs_equi_argument = getEquiArgument(condition, 0);
const auto & rhs_equi_argument = getEquiArgument(condition, 1);
if (can_join_on)
DataTypes key_types = {lhs_equi_argument->getResultType(), rhs_equi_argument->getResultType()};
DataTypePtr common_key_type = tryGetLeastSupertype(key_types);
/// If there is common key type, we can join on this condition
if (common_key_type)
{
auto left_src = getExpressionSource(lhs_equi_argument);
auto right_src = getExpressionSource(rhs_equi_argument);
if (left_src.second && right_src.second && left_src.first && right_src.first)
{
can_convert_cross_to_inner = true;
continue;
if ((findInTableExpression(left_src.first, left_table) && findInTableExpression(right_src.first, right_table)) ||
(findInTableExpression(left_src.first, right_table) && findInTableExpression(right_src.first, left_table)))
{
can_convert_cross_to_inner = true;
continue;
}
}
}
/// Can't join on this condition, move it to other conditions
other_conditions.push_back(cond);
cond = nullptr;
other_conditions.push_back(condition);
condition = nullptr;
}
if (!can_convert_cross_to_inner)

View File

@ -1,140 +0,0 @@
#include <Analyzer/Passes/CustomizeFunctionsPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
namespace DB
{
namespace
{
class CustomizeFunctionsVisitor : public InDepthQueryTreeVisitorWithContext<CustomizeFunctionsVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<CustomizeFunctionsVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node) const
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
const auto & settings = getSettings();
/// After successful function replacement function name and function name lowercase must be recalculated
auto function_name = function_node->getFunctionName();
auto function_name_lowercase = Poco::toLower(function_name);
if (function_node->isAggregateFunction() || function_node->isWindowFunction())
{
auto count_distinct_implementation_function_name = String(settings.count_distinct_implementation);
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
if (function_name_lowercase.ends_with("ifdistinct"))
{
size_t prefix_length = function_name_lowercase.size() - strlen("ifdistinct");
auto updated_function_name = function_name_lowercase.substr(0, prefix_length) + "DistinctIf";
resolveAggregateOrWindowFunctionNode(*function_node, updated_function_name);
function_name = function_node->getFunctionName();
function_name_lowercase = Poco::toLower(function_name);
}
/** Move -OrNull suffix ahead, this should execute after add -OrNull suffix.
* Used to rewrite aggregate functions with -OrNull suffix in some cases.
* Example: sumIfOrNull.
* Result: sumOrNullIf.
*/
if (function_name.ends_with("OrNull"))
{
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(function_name);
if (function_properies && !function_properies->returns_default_when_only_null)
{
size_t function_name_size = function_name.size();
static constexpr std::array<std::string_view, 4> suffixes_to_replace = {"MergeState", "Merge", "State", "If"};
for (const auto & suffix : suffixes_to_replace)
{
auto suffix_string_value = String(suffix);
auto suffix_to_check = suffix_string_value + "OrNull";
if (!function_name.ends_with(suffix_to_check))
continue;
auto updated_function_name = function_name.substr(0, function_name_size - suffix_to_check.size()) + "OrNull" + suffix_string_value;
resolveAggregateOrWindowFunctionNode(*function_node, updated_function_name);
function_name = function_node->getFunctionName();
function_name_lowercase = Poco::toLower(function_name);
break;
}
}
}
return;
}
if (settings.transform_null_in)
{
auto function_result_type = function_node->getResultType();
static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map =
{{
{"in", "nullIn"},
{"notin", "notNullIn"},
{"globalin", "globalNullIn"},
{"globalnotin", "globalNotNullIn"},
}};
for (const auto & [in_function_name, in_function_name_to_replace] : in_function_to_replace_null_in_function_map)
{
if (function_name_lowercase == in_function_name)
{
resolveOrdinaryFunctionNode(*function_node, String(in_function_name_to_replace));
function_name = function_node->getFunctionName();
function_name_lowercase = Poco::toLower(function_name);
break;
}
}
}
}
static inline void resolveAggregateOrWindowFunctionNode(FunctionNode & function_node, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
function_aggregate_function->getArgumentTypes(),
function_aggregate_function->getParameters(),
properties);
if (function_node.isAggregateFunction())
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
else if (function_node.isWindowFunction())
function_node.resolveAsWindowFunction(std::move(aggregate_function));
}
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
}
void CustomizeFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
CustomizeFunctionsVisitor visitor(std::move(context));
visitor.visit(query_tree_node);
}
}

View File

@ -1,25 +0,0 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Customize aggregate functions and `in` functions implementations.
*
* Example: SELECT countDistinct();
* Result: SELECT countDistinctImplementation();
* Function countDistinctImplementation is taken from settings.count_distinct_implementation.
*/
class CustomizeFunctionsPass final : public IQueryTreePass
{
public:
String getName() override { return "CustomizeFunctions"; }
String getDescription() override { return "Customize implementation of aggregate functions, and in functions."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -46,8 +46,7 @@ public:
}
else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1 &&
!getSettings().aggregate_functions_null_for_empty)
first_argument_constant_literal.get<UInt64>() == 1)
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();

View File

@ -37,14 +37,16 @@
#include <Storages/IStorage.h>
#include <Storages/StorageSet.h>
#include <Storages/StorageJoin.h>
#include <Interpreters/misc.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/StorageID.h>
#include <Interpreters/Context.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Interpreters/Set.h>
#include <Interpreters/misc.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
@ -92,6 +94,7 @@ namespace ErrorCodes
extern const int CYCLIC_ALIASES;
extern const int INCORRECT_RESULT_OF_SCALAR_SUBQUERY;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
extern const int TYPE_MISMATCH;
extern const int AMBIGUOUS_IDENTIFIER;
@ -361,6 +364,12 @@ struct IdentifierResolveResult
}
};
struct IdentifierResolveState
{
IdentifierResolveResult resolve_result;
bool cyclic_identifier_resolve = false;
};
struct IdentifierResolveSettings
{
/// Allow to check join tree during identifier resolution
@ -687,7 +696,7 @@ struct IdentifierResolveScope
ContextPtr context;
/// Identifier lookup to result
std::unordered_map<IdentifierLookup, IdentifierResolveResult, IdentifierLookupHash> identifier_lookup_to_result;
std::unordered_map<IdentifierLookup, IdentifierResolveState, IdentifierLookupHash> identifier_lookup_to_resolve_state;
/// Lambda argument can be expression like constant, column, or it can be function
std::unordered_map<std::string, QueryTreeNodePtr> expression_argument_name_to_node;
@ -799,11 +808,11 @@ struct IdentifierResolveScope
[[maybe_unused]] void dump(WriteBuffer & buffer) const
{
buffer << "Scope node " << scope_node->formatASTForErrorMessage() << '\n';
buffer << "Identifier lookup to result " << identifier_lookup_to_result.size() << '\n';
for (const auto & [identifier, result] : identifier_lookup_to_result)
buffer << "Identifier lookup to resolve state " << identifier_lookup_to_resolve_state.size() << '\n';
for (const auto & [identifier, state] : identifier_lookup_to_resolve_state)
{
buffer << "Identifier " << identifier.dump() << " resolve result ";
result.dump(buffer);
state.resolve_result.dump(buffer);
buffer << '\n';
}
@ -1156,15 +1165,17 @@ private:
static void replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope);
static void validateLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope);
static void convertLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope);
static void validateTableExpressionModifiers(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
/// Resolve identifier functions
@ -1963,26 +1974,10 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
auto constant_node = std::make_shared<ConstantNode>(std::move(constant_value), node);
if (constant_node->getValue().isNull())
{
std::string cast_type = constant_node->getResultType()->getName();
std::string cast_function_name = "_CAST";
auto cast_type_constant_value = std::make_shared<ConstantValue>(std::move(cast_type), std::make_shared<DataTypeString>());
auto cast_type_constant_node = std::make_shared<ConstantNode>(std::move(cast_type_constant_value));
auto cast_function_node = std::make_shared<FunctionNode>(cast_function_name);
cast_function_node->getArguments().getNodes().push_back(constant_node);
cast_function_node->getArguments().getNodes().push_back(std::move(cast_type_constant_node));
auto cast_function = FunctionFactory::instance().get(cast_function_name, context);
cast_function_node->resolveAsFunction(cast_function->build(cast_function_node->getArgumentColumns()));
node = std::move(cast_function_node);
}
node = buildCastFunction(constant_node, constant_node->getResultType(), context);
else
{
node = std::move(constant_node);
}
return;
}
@ -2091,7 +2086,7 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
}
}
void QueryAnalyzer::validateLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope)
void QueryAnalyzer::convertLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope)
{
const auto * limit_offset_constant_node = expression_node->as<ConstantNode>();
if (!limit_offset_constant_node || !isNativeNumber(removeNullable(limit_offset_constant_node->getResultType())))
@ -2101,11 +2096,17 @@ void QueryAnalyzer::validateLimitOffsetExpression(QueryTreeNodePtr & expression_
expression_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
Field converted = convertFieldToType(limit_offset_constant_node->getValue(), DataTypeUInt64());
if (converted.isNull())
Field converted_value = convertFieldToType(limit_offset_constant_node->getValue(), DataTypeUInt64());
if (converted_value.isNull())
throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION,
"{} numeric constant expression is not representable as UInt64",
expression_description);
auto constant_value = std::make_shared<ConstantValue>(std::move(converted_value), std::make_shared<DataTypeUInt64>());
auto result_constant_node = std::make_shared<ConstantNode>(std::move(constant_value));
result_constant_node->getSourceExpression() = limit_offset_constant_node->getSourceExpression();
expression_node = std::move(result_constant_node);
}
void QueryAnalyzer::validateTableExpressionModifiers(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope)
@ -2226,9 +2227,72 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
for (auto & node : projection_list.getNodes())
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context)
{
std::string result_aggregate_function_name = aggregate_function_name;
auto aggregate_function_name_lowercase = Poco::toLower(aggregate_function_name);
const auto & settings = context->getSettingsRef();
if (aggregate_function_name_lowercase == "countdistinct")
{
result_aggregate_function_name = settings.count_distinct_implementation;
}
else if (aggregate_function_name_lowercase == "countdistinctif" || aggregate_function_name_lowercase == "countifdistinct")
{
result_aggregate_function_name = settings.count_distinct_implementation;
result_aggregate_function_name += "If";
}
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
if (result_aggregate_function_name.ends_with("ifdistinct"))
{
size_t prefix_length = result_aggregate_function_name.size() - strlen("ifdistinct");
result_aggregate_function_name = result_aggregate_function_name.substr(0, prefix_length) + "DistinctIf";
}
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
if (need_add_or_null)
{
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
if (!properties->returns_default_when_only_null)
result_aggregate_function_name += "OrNull";
}
/** Move -OrNull suffix ahead, this should execute after add -OrNull suffix.
* Used to rewrite aggregate functions with -OrNull suffix in some cases.
* Example: sumIfOrNull.
* Result: sumOrNullIf.
*/
if (result_aggregate_function_name.ends_with("OrNull"))
{
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
if (function_properies && !function_properies->returns_default_when_only_null)
{
size_t function_name_size = result_aggregate_function_name.size();
static constexpr std::array<std::string_view, 4> suffixes_to_replace = {"MergeState", "Merge", "State", "If"};
for (const auto & suffix : suffixes_to_replace)
{
auto suffix_string_value = String(suffix);
auto suffix_to_check = suffix_string_value + "OrNull";
if (!result_aggregate_function_name.ends_with(suffix_to_check))
continue;
result_aggregate_function_name = result_aggregate_function_name.substr(0, function_name_size - suffix_to_check.size());
result_aggregate_function_name += "OrNull";
result_aggregate_function_name += suffix_string_value;
break;
}
}
}
return result_aggregate_function_name;
}
/// Resolve identifier functions implementation
@ -2256,6 +2320,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
StorageID storage_id(database_name, table_name);
storage_id = context->resolveStorageID(storage_id);
bool is_temporary_table = storage_id.getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE;
auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
if (!storage)
return {};
@ -2263,7 +2329,11 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
return std::make_shared<TableNode>(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
auto result = std::make_shared<TableNode>(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
if (is_temporary_table)
result->setTemporaryTableName(table_name);
return result;
}
/// Resolve identifier from compound expression
@ -2991,7 +3061,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
{
resolved_identifier = resolved_identifier->clone();
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
resolved_column.setColumnType(makeNullableSafe(resolved_column.getColumnType()));
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
}
return resolved_identifier;
@ -3310,21 +3380,28 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
IdentifierResolveScope & scope,
IdentifierResolveSettings identifier_resolve_settings)
{
auto it = scope.identifier_lookup_to_result.find(identifier_lookup);
if (it != scope.identifier_lookup_to_result.end())
auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup);
if (it != scope.identifier_lookup_to_resolve_state.end())
{
if (!it->second.resolved_identifier)
if (it->second.cyclic_identifier_resolve)
throw Exception(ErrorCodes::CYCLIC_ALIASES,
"Cyclic aliases for identifier '{}'. In scope {}",
identifier_lookup.identifier.getFullName(),
scope.scope_node->formatASTForErrorMessage());
if (scope.use_identifier_lookup_to_result_cache && !scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup))
return it->second;
}
if (!it->second.resolve_result.isResolved())
it->second.cyclic_identifier_resolve = true;
auto [insert_it, _] = scope.identifier_lookup_to_result.insert({identifier_lookup, IdentifierResolveResult()});
it = insert_it;
if (it->second.resolve_result.isResolved() &&
scope.use_identifier_lookup_to_result_cache &&
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup))
return it->second.resolve_result;
}
else
{
auto [insert_it, _] = scope.identifier_lookup_to_resolve_state.insert({identifier_lookup, IdentifierResolveState()});
it = insert_it;
}
/// Resolve identifier from current scope
@ -3403,15 +3480,18 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
resolve_result.resolve_place = IdentifierResolvePlace::DATABASE_CATALOG;
}
it->second = resolve_result;
bool was_cyclic_identifier_resolve = it->second.cyclic_identifier_resolve;
if (!was_cyclic_identifier_resolve)
it->second.resolve_result = resolve_result;
it->second.cyclic_identifier_resolve = false;
/** If identifier was not resolved, or during expression resolution identifier was explicitly added into non cached set,
* or identifier caching was disabled in resolve scope we remove identifier lookup result from identifier lookup to result table.
*/
if (!resolve_result.resolved_identifier ||
if (!was_cyclic_identifier_resolve && (!resolve_result.resolved_identifier ||
scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) ||
!scope.use_identifier_lookup_to_result_cache)
scope.identifier_lookup_to_result.erase(it);
!scope.use_identifier_lookup_to_result_cache))
scope.identifier_lookup_to_resolve_state.erase(it);
return resolve_result;
}
@ -3625,7 +3705,7 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
if (!table_expression_node)
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Qualified matcher {} does not find table. In scope {}",
matcher_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
@ -3874,17 +3954,16 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope);
std::unordered_map<const IColumnTransformerNode *, std::unordered_set<std::string>> strict_transformer_to_used_column_names;
auto add_strict_transformer_column_name = [&](const IColumnTransformerNode * transformer, const std::string & column_name)
for (const auto & transformer : matcher_node_typed.getColumnTransformers().getNodes())
{
auto it = strict_transformer_to_used_column_names.find(transformer);
if (it == strict_transformer_to_used_column_names.end())
{
auto [inserted_it, _] = strict_transformer_to_used_column_names.emplace(transformer, std::unordered_set<std::string>());
it = inserted_it;
}
auto * except_transformer = transformer->as<ExceptColumnTransformerNode>();
auto * replace_transformer = transformer->as<ReplaceColumnTransformerNode>();
it->second.insert(column_name);
};
if (except_transformer && except_transformer->isStrict())
strict_transformer_to_used_column_names.emplace(except_transformer, std::unordered_set<std::string>());
else if (replace_transformer && replace_transformer->isStrict())
strict_transformer_to_used_column_names.emplace(replace_transformer, std::unordered_set<std::string>());
}
ListNodePtr list = std::make_shared<ListNode>();
ProjectionNames result_projection_names;
@ -3939,12 +4018,12 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
else if (auto * except_transformer = transformer->as<ExceptColumnTransformerNode>())
{
if (apply_transformer_was_used || replace_transformer_was_used)
break;
continue;
if (except_transformer->isColumnMatching(column_name))
{
if (except_transformer->isStrict())
add_strict_transformer_column_name(except_transformer, column_name);
strict_transformer_to_used_column_names[except_transformer].insert(column_name);
node = {};
break;
@ -3953,7 +4032,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
else if (auto * replace_transformer = transformer->as<ReplaceColumnTransformerNode>())
{
if (apply_transformer_was_used || replace_transformer_was_used)
break;
continue;
replace_transformer_was_used = true;
@ -3962,10 +4041,19 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
continue;
if (replace_transformer->isStrict())
add_strict_transformer_column_name(replace_transformer, column_name);
strict_transformer_to_used_column_names[replace_transformer].insert(column_name);
node = replace_expression->clone();
node_projection_names = resolveExpressionNode(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
/** If replace expression resolved as single node, we want to use replace column name as result projection name, instead
* of using replace expression projection name.
*
* Example: SELECT * REPLACE id + 5 AS id FROM test_table;
*/
if (node_projection_names.size() == 1)
node_projection_names[0] = column_name;
execute_replace_transformer = true;
}
@ -4050,22 +4138,12 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
non_matched_column_names.push_back(column_name);
}
WriteBufferFromOwnString non_matched_column_names_buffer;
size_t non_matched_column_names_size = non_matched_column_names.size();
for (size_t i = 0; i < non_matched_column_names_size; ++i)
{
const auto & column_name = non_matched_column_names[i];
non_matched_column_names_buffer << column_name;
if (i + 1 != non_matched_column_names_size)
non_matched_column_names_buffer << ", ";
}
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Strict {} column transformer {} expects following column(s) {}",
"Strict {} column transformer {} expects following column(s) : {}. In scope {}",
toString(strict_transformer_type),
strict_transformer->formatASTForErrorMessage(),
non_matched_column_names_buffer.str());
fmt::join(non_matched_column_names, ", "),
scope.scope_node->formatASTForErrorMessage());
}
matcher_node = std::move(list);
@ -4359,17 +4437,26 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
bool is_special_function_in = false;
bool is_special_function_dict_get_or_join_get = false;
bool is_special_function_dict_get = false;
bool is_special_function_join_get = false;
bool is_special_function_exists = false;
if (!lambda_expression_untyped)
{
is_special_function_in = isNameOfInFunction(function_name);
is_special_function_dict_get_or_join_get = functionIsJoinGet(function_name) || functionIsDictGet(function_name);
is_special_function_dict_get = functionIsDictGet(function_name);
is_special_function_join_get = functionIsJoinGet(function_name);
is_special_function_exists = function_name == "exists";
/// Handle SELECT count(*) FROM test_table
if (Poco::toLower(function_name) == "count" && function_node_ptr->getArguments().getNodes().size() == 1)
auto function_name_lowercase = Poco::toLower(function_name);
/** Special handling for count and countState functions.
*
* Example: SELECT count(*) FROM test_table
* Example: SELECT countState(*) FROM test_table;
*/
if (function_node_ptr->getArguments().getNodes().size() == 1 &&
(function_name_lowercase == "count" || function_name_lowercase == "countstate"))
{
auto * matcher_node = function_node_ptr->getArguments().getNodes().front()->as<MatcherNode>();
if (matcher_node && matcher_node->isUnqualified())
@ -4386,29 +4473,58 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
* Otherwise replace identifier with identifier full name constant.
* Validation that dictionary exists or table exists will be performed during function `getReturnType` method call.
*/
if (is_special_function_dict_get_or_join_get &&
if ((is_special_function_dict_get || is_special_function_join_get) &&
!function_node_ptr->getArguments().getNodes().empty() &&
function_node_ptr->getArguments().getNodes()[0]->getNodeType() == QueryTreeNodeType::IDENTIFIER)
{
auto & first_argument = function_node_ptr->getArguments().getNodes()[0];
auto & identifier_node = first_argument->as<IdentifierNode &>();
IdentifierLookup identifier_lookup{identifier_node.getIdentifier(), IdentifierLookupContext::EXPRESSION};
auto & first_argument_identifier = first_argument->as<IdentifierNode &>();
auto identifier = first_argument_identifier.getIdentifier();
IdentifierLookup identifier_lookup{identifier, IdentifierLookupContext::EXPRESSION};
auto resolve_result = tryResolveIdentifier(identifier_lookup, scope);
if (resolve_result.isResolved())
{
first_argument = std::move(resolve_result.resolved_identifier);
}
else
first_argument = std::make_shared<ConstantNode>(identifier_node.getIdentifier().getFullName());
{
size_t parts_size = identifier.getPartsSize();
if (parts_size < 1 || parts_size > 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected {} function first argument identifier to contain 1 or 2 parts. Actual '{}'. In scope {}",
function_name,
identifier.getFullName(),
scope.scope_node->formatASTForErrorMessage());
if (is_special_function_dict_get)
{
scope.context->getExternalDictionariesLoader().assertDictionaryStructureExists(identifier.getFullName(), scope.context);
}
else
{
auto table_node = tryResolveTableIdentifierFromDatabaseCatalog(identifier, scope.context);
if (!table_node)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} first argument expected table identifier '{}'. In scope {}",
function_name,
identifier.getFullName(),
scope.scope_node->formatASTForErrorMessage());
auto & table_node_typed = table_node->as<TableNode &>();
if (!std::dynamic_pointer_cast<StorageJoin>(table_node_typed.getStorage()))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} table '{}' should have engine StorageJoin. In scope {}",
function_name,
identifier.getFullName(),
scope.scope_node->formatASTForErrorMessage());
}
first_argument = std::make_shared<ConstantNode>(identifier.getFullName());
}
}
/// Resolve function arguments
bool allow_table_expressions = is_special_function_in || is_special_function_exists;
auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(),
scope,
true /*allow_lambda_expression*/,
allow_table_expressions /*allow_table_expression*/);
if (is_special_function_exists)
{
/// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1).
@ -4417,25 +4533,51 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
auto constant_data_type = std::make_shared<DataTypeUInt64>();
auto in_subquery = std::make_shared<QueryNode>(Context::createCopy(scope.context));
in_subquery->setIsSubquery(true);
in_subquery->getProjection().getNodes().push_back(std::make_shared<ConstantNode>(1UL, constant_data_type));
in_subquery->getJoinTree() = exists_subquery_argument;
in_subquery->getLimit() = std::make_shared<ConstantNode>(1UL, constant_data_type);
in_subquery->resolveProjectionColumns({NameAndTypePair("1", constant_data_type)});
in_subquery->setIsSubquery(true);
function_node_ptr = std::make_shared<FunctionNode>("in");
function_node_ptr->getArguments().getNodes() = {std::make_shared<ConstantNode>(1UL, constant_data_type), in_subquery};
node = function_node_ptr;
function_name = "in";
is_special_function_in = true;
}
/// Resolve function arguments
bool allow_table_expressions = is_special_function_in;
auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(),
scope,
true /*allow_lambda_expression*/,
allow_table_expressions /*allow_table_expression*/);
auto & function_node = *function_node_ptr;
/// Replace right IN function argument if it is table or table function with subquery that read ordinary columns
if (is_special_function_in)
{
if (scope.context->getSettingsRef().transform_null_in)
{
static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map =
{{
{"in", "nullIn"},
{"notIn", "notNullIn"},
{"globalIn", "globalNullIn"},
{"globalNotIn", "globalNotNullIn"},
}};
for (const auto & [in_function_name, in_function_name_to_replace] : in_function_to_replace_null_in_function_map)
{
if (function_name == in_function_name)
{
function_name = in_function_name_to_replace;
break;
}
}
}
auto & function_in_arguments_nodes = function_node.getArguments().getNodes();
if (function_in_arguments_nodes.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function '{}' expects 2 arguments", function_name);
@ -4496,6 +4638,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
auto & function_argument = function_arguments[function_argument_index];
ColumnWithTypeAndName argument_column;
argument_column.name = arguments_projection_names[function_argument_index];
/** If function argument is lambda, save lambda argument index and initialize argument type as DataTypeFunction
* where function argument types are initialized with empty array of lambda arguments size.
@ -4537,7 +4680,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
/// Calculate function projection name
ProjectionNames result_projection_names = {calculateFunctionProjectionName(node, parameters_projection_names, arguments_projection_names)};
ProjectionNames result_projection_names = { calculateFunctionProjectionName(node, parameters_projection_names, arguments_projection_names) };
/** Try to resolve function as
* 1. Lambda function in current scope. Example: WITH (x -> x + 1) AS lambda SELECT lambda(1);
@ -4656,8 +4799,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
}
const auto & settings = scope.context->getSettingsRef();
if (function_node.isWindowFunction())
{
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
@ -4672,12 +4813,10 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Window function '{}' does not support lambda arguments",
function_name);
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull");
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
AggregateFunctionProperties properties;
auto aggregate_function = need_add_or_null
? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties)
: AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
function_node.resolveAsWindowFunction(std::move(aggregate_function));
@ -4736,24 +4875,10 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Aggregate function '{}' does not support lambda arguments",
function_name);
auto function_name_lowercase = Poco::toLower(function_name);
if (function_name_lowercase == "countdistinct")
{
function_name = scope.context->getSettingsRef().count_distinct_implementation;
}
else if (function_name_lowercase == "countdistinctif" || function_name_lowercase == "countifdistinct")
{
function_name = scope.context->getSettingsRef().count_distinct_implementation;
function_name += "If";
}
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull");
if (need_add_or_null)
function_name += "OrNull";
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
@ -4867,7 +4992,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
/// Recalculate function projection name after lambda resolution
result_projection_names = {calculateFunctionProjectionName(node, parameters_projection_names, arguments_projection_names)};
result_projection_names = { calculateFunctionProjectionName(node, parameters_projection_names, arguments_projection_names) };
}
/** Create SET column for special function IN to allow constant folding
@ -4906,11 +5031,15 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
{
auto function_base = function->build(argument_columns);
/// Do not constant fold get scalar functions
bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" ||
function_name == "shardCount";
/** If function is suitable for constant folding try to convert it to constant.
* Example: SELECT plus(1, 1);
* Result: SELECT 2;
*/
if (function_base->isSuitableForConstantFolding())
if (function_base->isSuitableForConstantFolding() && !disable_constant_folding)
{
auto result_type = function_base->getResultType();
auto executable_function = function_base->prepare(argument_columns);
@ -4981,6 +5110,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
*/
ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
{
checkStackSize();
auto resolved_expression_it = resolved_expressions.find(node);
if (resolved_expression_it != resolved_expressions.end())
{
@ -5687,10 +5818,19 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
if (table_node)
{
const auto & table_storage_id = table_node->getStorageID();
table_expression_data.database_name = table_storage_id.database_name;
table_expression_data.table_name = table_storage_id.table_name;
table_expression_data.table_expression_name = table_storage_id.getFullNameNotQuoted();
if (!table_node->getTemporaryTableName().empty())
{
table_expression_data.table_name = table_node->getTemporaryTableName();
table_expression_data.table_expression_name = table_node->getTemporaryTableName();
}
else
{
const auto & table_storage_id = table_node->getStorageID();
table_expression_data.database_name = table_storage_id.database_name;
table_expression_data.table_name = table_storage_id.table_name;
table_expression_data.table_expression_name = table_storage_id.getFullNameNotQuoted();
}
table_expression_data.table_expression_description = "table";
}
else if (query_node || union_node)
@ -5736,7 +5876,9 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
if (column_default && column_default->kind == ColumnDefaultKind::Alias)
{
auto column_node = std::make_shared<ColumnNode>(column_name_and_type, buildQueryTree(column_default->expression, scope.context), table_expression_node);
auto alias_expression = buildQueryTree(column_default->expression, scope.context);
alias_expression = buildCastFunction(alias_expression, column_name_and_type.type, scope.context, false /*resolve*/);
auto column_node = std::make_shared<ColumnNode>(column_name_and_type, std::move(alias_expression), table_expression_node);
column_name_to_column_node.emplace(column_name_and_type.name, column_node);
alias_columns_to_resolve.emplace_back(column_name_and_type.name, column_node);
}
@ -6103,13 +6245,6 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
IdentifierLookup identifier_lookup{identifier_node->getIdentifier(), IdentifierLookupContext::EXPRESSION};
auto result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getLeftTableExpression(), scope);
if (!result_left_table_expression && identifier_node->hasAlias())
{
std::vector<std::string> alias_identifier_parts = {identifier_node->getAlias()};
IdentifierLookup alias_identifier_lookup{Identifier(std::move(alias_identifier_parts)), IdentifierLookupContext::EXPRESSION};
result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(alias_identifier_lookup, join_node_typed.getLeftTableExpression(), scope);
}
if (!result_left_table_expression)
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"JOIN {} using identifier '{}' cannot be resolved from left table expression. In scope {}",
@ -6125,13 +6260,6 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
scope.scope_node->formatASTForErrorMessage());
auto result_right_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getRightTableExpression(), scope);
if (!result_right_table_expression && identifier_node->hasAlias())
{
std::vector<std::string> alias_identifier_parts = {identifier_node->getAlias()};
IdentifierLookup alias_identifier_lookup{Identifier(std::move(alias_identifier_parts)), IdentifierLookupContext::EXPRESSION};
result_right_table_expression = tryResolveIdentifierFromJoinTreeNode(alias_identifier_lookup, join_node_typed.getRightTableExpression(), scope);
}
if (!result_right_table_expression)
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"JOIN {} using identifier '{}' cannot be resolved from right table expression. In scope {}",
@ -6223,6 +6351,21 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
{
validateTableExpressionModifiers(join_tree_node, scope);
initializeTableExpressionData(join_tree_node, scope);
auto & query_node = scope.scope_node->as<QueryNode &>();
auto & mutable_context = query_node.getMutableContext();
if (!mutable_context->isDistributed())
{
bool is_distributed = false;
if (auto * table_node = join_tree_node->as<TableNode>())
is_distributed = table_node->getStorage()->isRemote();
else if (auto * table_function_node = join_tree_node->as<TableFunctionNode>())
is_distributed = table_function_node->getStorage()->isRemote();
mutable_context->setDistributed(is_distributed);
}
}
auto add_table_expression_alias_into_scope = [&](const QueryTreeNodePtr & table_expression_node)
@ -6473,13 +6616,13 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasLimitByLimit())
{
resolveExpressionNode(query_node_typed.getLimitByLimit(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
validateLimitOffsetExpression(query_node_typed.getLimitByLimit(), "LIMIT BY LIMIT", scope);
convertLimitOffsetExpression(query_node_typed.getLimitByLimit(), "LIMIT BY LIMIT", scope);
}
if (query_node_typed.hasLimitByOffset())
{
resolveExpressionNode(query_node_typed.getLimitByOffset(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
validateLimitOffsetExpression(query_node_typed.getLimitByOffset(), "LIMIT BY OFFSET", scope);
convertLimitOffsetExpression(query_node_typed.getLimitByOffset(), "LIMIT BY OFFSET", scope);
}
if (query_node_typed.hasLimitBy())
@ -6493,13 +6636,13 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasLimit())
{
resolveExpressionNode(query_node_typed.getLimit(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
validateLimitOffsetExpression(query_node_typed.getLimit(), "LIMIT", scope);
convertLimitOffsetExpression(query_node_typed.getLimit(), "LIMIT", scope);
}
if (query_node_typed.hasOffset())
{
resolveExpressionNode(query_node_typed.getOffset(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
validateLimitOffsetExpression(query_node_typed.getOffset(), "OFFSET", scope);
convertLimitOffsetExpression(query_node_typed.getOffset(), "OFFSET", scope);
}
/** Resolve nodes with duplicate aliases.

View File

@ -54,6 +54,7 @@ namespace DB
* Replace `countDistinct` and `countIfDistinct` aggregate functions using setting count_distinct_implementation.
* Add -OrNull suffix to aggregate functions if setting aggregate_functions_null_for_empty is true.
* Function `exists` is converted into `in`.
* Functions `in`, `notIn`, `globalIn`, `globalNotIn` converted into `nullIn`, `notNullIn`, `globalNullIn`, `globalNotNullIn` if setting transform_null_in is true.
*
* For function `grouping` arguments are resolved, but it is planner responsibility to initialize it with concrete grouping function
* based on group by kind and group by keys positions.

View File

@ -17,7 +17,6 @@
namespace DB
{
namespace
{
@ -90,9 +89,14 @@ private:
static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
std::string suffix = "If";
if (result_type->isNullable())
suffix = "OrNullIf";
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + (result_type->isNullable() ? "IfOrNull" : "If"),
function_node.getFunctionName() + suffix,
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);

View File

@ -0,0 +1,67 @@
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Storages/IStorage.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
namespace DB
{
namespace
{
class ShardNumColumnToFunctionVisitor : public InDepthQueryTreeVisitorWithContext<ShardNumColumnToFunctionVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<ShardNumColumnToFunctionVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node) const
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;
const auto & column = column_node->getColumn();
if (column.name != "_shard_num")
return;
auto column_source = column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
auto * table_function_node = column_source->as<TableFunctionNode>();
if (!table_node && !table_function_node)
return;
const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage();
if (!storage->isRemote())
return;
const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
if (!storage->isVirtualColumn(column.name, storage_snapshot->getMetadataForQuery()))
return;
auto function_node = std::make_shared<FunctionNode>("shardNum");
auto function = FunctionFactory::instance().get(function_node->getFunctionName(), getContext());
function_node->resolveAsFunction(function->build(function_node->getArgumentColumns()));
node = std::move(function_node);
}
};
}
void ShardNumColumnToFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
ShardNumColumnToFunctionVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Rewrite _shard_num column into shardNum() function.
*/
class ShardNumColumnToFunctionPass final : public IQueryTreePass
{
public:
String getName() override { return "ShardNumColumnToFunctionPass"; }
String getDescription() override { return "Rewrite _shard_num column into shardNum() function"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -26,7 +26,6 @@
#include <Analyzer/Passes/IfChainToMultiIfPass.h>
#include <Analyzer/Passes/OrderByTupleEliminationPass.h>
#include <Analyzer/Passes/NormalizeCountVariantsPass.h>
#include <Analyzer/Passes/CustomizeFunctionsPass.h>
#include <Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h>
#include <Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h>
#include <Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h>
@ -40,6 +39,7 @@
#include <Analyzer/Passes/ArrayExistsToHasPass.h>
#include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
namespace DB
@ -147,7 +147,6 @@ private:
/** ClickHouse query tree pass manager.
*
* TODO: Support _shard_num into shardNum() rewriting.
* TODO: Support logical expressions optimizer.
* TODO: Support setting convert_query_to_cnf.
* TODO: Support setting optimize_using_constraints.
@ -242,8 +241,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());
manager.addPass(std::make_unique<CustomizeFunctionsPass>());
manager.addPass(std::make_unique<AggregateFunctionsArithmericOperationsPass>());
manager.addPass(std::make_unique<UniqInjectiveFunctionsEliminationPass>());
manager.addPass(std::make_unique<OptimizeGroupByFunctionKeysPass>());
@ -266,10 +263,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
}
}

View File

@ -82,14 +82,7 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
if (storage && rhs_typed.storage)
return storage_id == rhs_typed.storage_id;
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
return false;
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
return false;
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
return false;
return true;
return table_expression_modifiers == rhs_typed.table_expression_modifiers;
}
void TableFunctionNode::updateTreeHashImpl(HashState & state) const

View File

@ -42,6 +42,9 @@ void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
buffer << ", table_name: " << storage_id.getFullNameNotQuoted();
if (!temporary_table_name.empty())
buffer << ", temporary_table_name: " << temporary_table_name;
if (table_expression_modifiers)
{
buffer << ", ";
@ -52,15 +55,8 @@ void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
bool TableNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const TableNode &>(rhs);
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
return false;
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
return false;
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
return false;
return storage_id == rhs_typed.storage_id;
return storage_id == rhs_typed.storage_id && table_expression_modifiers == rhs_typed.table_expression_modifiers &&
temporary_table_name == rhs_typed.temporary_table_name;
}
void TableNode::updateTreeHashImpl(HashState & state) const
@ -69,6 +65,9 @@ void TableNode::updateTreeHashImpl(HashState & state) const
state.update(full_name.size());
state.update(full_name);
state.update(temporary_table_name.size());
state.update(temporary_table_name);
if (table_expression_modifiers)
table_expression_modifiers->updateTreeHash(state);
}
@ -77,12 +76,16 @@ QueryTreeNodePtr TableNode::cloneImpl() const
{
auto result_table_node = std::make_shared<TableNode>(storage, storage_id, storage_lock, storage_snapshot);
result_table_node->table_expression_modifiers = table_expression_modifiers;
result_table_node->temporary_table_name = temporary_table_name;
return result_table_node;
}
ASTPtr TableNode::toASTImpl() const
{
if (!temporary_table_name.empty())
return std::make_shared<ASTTableIdentifier>(temporary_table_name);
return std::make_shared<ASTTableIdentifier>(storage_id.getDatabaseName(), storage_id.getTableName());
}

View File

@ -56,6 +56,18 @@ public:
return storage_lock;
}
/// Get temporary table name
const std::string & getTemporaryTableName() const
{
return temporary_table_name;
}
/// Set temporary table name
void setTemporaryTableName(std::string temporary_table_name_value)
{
temporary_table_name = std::move(temporary_table_name_value);
}
/// Return true if table node has table expression modifiers, false otherwise
bool hasTableExpressionModifiers() const
{
@ -102,6 +114,7 @@ private:
TableLockHolder storage_lock;
StorageSnapshotPtr storage_snapshot;
std::optional<TableExpressionModifiers> table_expression_modifiers;
std::string temporary_table_name;
static constexpr size_t children_size = 0;
};

View File

@ -5,13 +5,18 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTFunction.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/IdentifierNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/JoinNode.h>
@ -74,6 +79,29 @@ bool isNameOfInFunction(const std::string & function_name)
return is_special_function_in;
}
QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression,
const DataTypePtr & type,
const ContextPtr & context,
bool resolve)
{
std::string cast_type = type->getName();
auto cast_type_constant_value = std::make_shared<ConstantValue>(std::move(cast_type), std::make_shared<DataTypeString>());
auto cast_type_constant_node = std::make_shared<ConstantNode>(std::move(cast_type_constant_value));
std::string cast_function_name = "_CAST";
auto cast_function_node = std::make_shared<FunctionNode>(cast_function_name);
cast_function_node->getArguments().getNodes().push_back(expression);
cast_function_node->getArguments().getNodes().push_back(std::move(cast_type_constant_node));
if (resolve)
{
auto cast_function = FunctionFactory::instance().get(cast_function_name, context);
cast_function_node->resolveAsFunction(cast_function->build(cast_function_node->getArgumentColumns()));
}
return cast_function_node;
}
static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expression_node)
{
ASTPtr table_expression_node_ast;

Some files were not shown because too many files have changed in this diff Show More