mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-05 22:12:21 +00:00
Merge branch 'master' into empty-parts-fix
This commit is contained in:
commit
b9e229d19d
@ -516,9 +516,9 @@ include (cmake/find/fast_float.cmake)
|
||||
include (cmake/find/rapidjson.cmake)
|
||||
include (cmake/find/fastops.cmake)
|
||||
include (cmake/find/odbc.cmake)
|
||||
include (cmake/find/nanodbc.cmake)
|
||||
include (cmake/find/rocksdb.cmake)
|
||||
include (cmake/find/libpqxx.cmake)
|
||||
include (cmake/find/nanodbc.cmake)
|
||||
include (cmake/find/nuraft.cmake)
|
||||
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
|
||||
#if defined(__PPC__)
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
#endif
|
||||
@ -1266,7 +1266,7 @@ public:
|
||||
};
|
||||
|
||||
#if defined(__PPC__)
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
@ -159,9 +159,9 @@ public:
|
||||
*/
|
||||
Pool(const std::string & db_,
|
||||
const std::string & server_,
|
||||
const std::string & user_ = "",
|
||||
const std::string & password_ = "",
|
||||
unsigned port_ = 0,
|
||||
const std::string & user_,
|
||||
const std::string & password_,
|
||||
unsigned port_,
|
||||
const std::string & socket_ = "",
|
||||
unsigned connect_timeout_ = MYSQLXX_DEFAULT_TIMEOUT,
|
||||
unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54450)
|
||||
SET(VERSION_REVISION 54451)
|
||||
SET(VERSION_MAJOR 21)
|
||||
SET(VERSION_MINOR 5)
|
||||
SET(VERSION_MINOR 6)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 3827789b3d8fd2021952e57e5110343d26daa1a1)
|
||||
SET(VERSION_DESCRIBE v21.5.1.1-prestable)
|
||||
SET(VERSION_STRING 21.5.1.1)
|
||||
SET(VERSION_GITHASH 96fced4c3cf432fb0b401d2ab01f0c56e5f74a96)
|
||||
SET(VERSION_DESCRIBE v21.6.1.1-prestable)
|
||||
SET(VERSION_STRING 21.6.1.1)
|
||||
# end of autochange
|
||||
|
@ -1,35 +1,16 @@
|
||||
option(ENABLE_NANODBC "Enalbe nanodbc" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_NANODBC)
|
||||
set (USE_ODBC 0)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT ENABLE_ODBC)
|
||||
set (USE_NANODBC 0)
|
||||
message (STATUS "Using nanodbc=${USE_NANODBC}")
|
||||
return()
|
||||
endif()
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
if (NOT USE_INTERNAL_NANODBC_LIBRARY)
|
||||
message (FATAL_ERROR "Only the bundled nanodbc library can be used")
|
||||
endif ()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt")
|
||||
message (WARNING "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal nanodbc library")
|
||||
set (USE_NANODBC 0)
|
||||
return()
|
||||
message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/include")
|
||||
message (ERROR "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal unixodbc needed for nanodbc")
|
||||
set (USE_NANODBC 0)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set (USE_NANODBC 1)
|
||||
|
||||
set (NANODBC_LIBRARY nanodbc)
|
||||
set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbc")
|
||||
|
||||
set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbce")
|
||||
|
||||
message (STATUS "Using nanodbc=${USE_NANODBC}: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}")
|
||||
message (STATUS "Using unixodbc")
|
||||
message (STATUS "Using nanodbc: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}")
|
||||
|
@ -50,4 +50,6 @@ if (NOT EXTERNAL_ODBC_LIBRARY_FOUND)
|
||||
set (USE_INTERNAL_ODBC_LIBRARY 1)
|
||||
endif ()
|
||||
|
||||
set (USE_INTERNAL_NANODBC_LIBRARY 1)
|
||||
|
||||
message (STATUS "Using unixodbc")
|
||||
|
@ -171,6 +171,7 @@ elseif (COMPILER_GCC)
|
||||
add_cxx_compile_options(-Wtrampolines)
|
||||
# Obvious
|
||||
add_cxx_compile_options(-Wunused)
|
||||
add_cxx_compile_options(-Wundef)
|
||||
# Warn if vector operation is not implemented via SIMD capabilities of the architecture
|
||||
add_cxx_compile_options(-Wvector-operation-performance)
|
||||
# XXX: libstdc++ has some of these for 3way compare
|
||||
|
5
contrib/CMakeLists.txt
vendored
5
contrib/CMakeLists.txt
vendored
@ -47,6 +47,7 @@ add_subdirectory (lz4-cmake)
|
||||
add_subdirectory (murmurhash)
|
||||
add_subdirectory (replxx-cmake)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
add_subdirectory (nanodbc-cmake)
|
||||
|
||||
if (USE_INTERNAL_XZ_LIBRARY)
|
||||
add_subdirectory (xz)
|
||||
@ -320,10 +321,6 @@ if (USE_LIBPQXX)
|
||||
add_subdirectory (libpqxx-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_NANODBC)
|
||||
add_subdirectory (nanodbc-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_NURAFT)
|
||||
add_subdirectory(nuraft-cmake)
|
||||
endif()
|
||||
|
2
contrib/datasketches-cpp
vendored
2
contrib/datasketches-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit 45885c0c8c0807bb9480886d60ca7042000a4c43
|
||||
Subproject commit f915d35b2de676683493c86c585141a1e1c83334
|
@ -1,3 +1,7 @@
|
||||
if (NOT USE_INTERNAL_NANODBC_LIBRARY)
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/nanodbc)
|
||||
|
||||
if (NOT TARGET unixodbc)
|
||||
|
2
contrib/zlib-ng
vendored
2
contrib/zlib-ng
vendored
@ -1 +1 @@
|
||||
Subproject commit 7f254522fd676ff4e906c6d4e9b30d4df4214c2d
|
||||
Subproject commit 5cc4d232020dc66d1d6c5438834457e2a2f6127b
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (21.5.1.1) unstable; urgency=low
|
||||
clickhouse (21.6.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Fri, 02 Apr 2021 18:34:26 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 20 Apr 2021 01:48:16 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.5.1.*
|
||||
ARG version=21.6.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.5.1.*
|
||||
ARG version=21.6.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.5.1.*
|
||||
ARG version=21.6.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -308,12 +308,8 @@ function run_tests
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
01798_uniq_theta_sketch
|
||||
01799_long_uniq_theta_sketch
|
||||
collate
|
||||
collation
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
|
@ -17,6 +17,9 @@
|
||||
|
||||
<!-- One NUMA node w/o hyperthreading -->
|
||||
<max_threads>12</max_threads>
|
||||
|
||||
<!-- mmap shows some improvements in perf tests -->
|
||||
<min_bytes_to_use_mmap_io>64Mi</min_bytes_to_use_mmap_io>
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
|
@ -104,6 +104,12 @@ clickhouse-client -q "system flush logs" ||:
|
||||
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
|
||||
clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
|
||||
clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
|
||||
clickhouse-client --allow_introspection_functions=1 -q "
|
||||
WITH
|
||||
arrayMap(x -> concat(demangle(addressToSymbol(x)), ':', addressToLine(x)), trace) AS trace_array,
|
||||
arrayStringConcat(trace_array, '\n') AS trace_string
|
||||
SELECT * EXCEPT(trace), trace_string FROM system.trace_log FORMAT TSVWithNamesAndTypes
|
||||
" | pigz > /test_output/trace-log.tsv.gz &
|
||||
wait ||:
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
|
||||
|
@ -136,6 +136,7 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
|
||||
tar -chf /test_output/trace_log_dump.tar /var/lib/clickhouse/data/system/trace_log ||:
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
|
@ -19,26 +19,26 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure,
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
|
||||
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension.
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
|
||||
|
||||
**Example:**
|
||||
**Example**
|
||||
|
||||
**1.** Set up the `s3_engine_table` table:
|
||||
1. Set up the `s3_engine_table` table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
|
||||
``` sql
|
||||
CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
**2.** Fill file:
|
||||
2. Fill file:
|
||||
|
||||
```sql
|
||||
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
|
||||
``` sql
|
||||
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3);
|
||||
```
|
||||
|
||||
**3.** Query the data:
|
||||
3. Query the data:
|
||||
|
||||
```sql
|
||||
SELECT * FROM s3_engine_table LIMIT 2
|
||||
``` sql
|
||||
SELECT * FROM s3_engine_table LIMIT 2;
|
||||
```
|
||||
|
||||
```text
|
||||
@ -73,13 +73,63 @@ For more information about virtual columns see [here](../../../engines/table-eng
|
||||
|
||||
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
|
||||
|
||||
## S3-related Settings {#s3-settings}
|
||||
**Example**
|
||||
|
||||
1. Suppose we have several files in CSV format with the following URIs on S3:
|
||||
|
||||
- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’
|
||||
- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’
|
||||
- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’
|
||||
- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’
|
||||
- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’
|
||||
- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’
|
||||
|
||||
There are several ways to make a table consisting of all six files:
|
||||
|
||||
The first way:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
|
||||
```
|
||||
|
||||
Another way:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
|
||||
```
|
||||
|
||||
Table consists of all the files in both directories (all files should satisfy format and schema described in query):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
|
||||
```
|
||||
|
||||
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
|
||||
|
||||
**Example**
|
||||
|
||||
Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
||||
## S3-related settings {#settings}
|
||||
|
||||
The following settings can be set before query execution or placed into configuration file.
|
||||
|
||||
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
|
||||
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
|
||||
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
|
||||
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
|
||||
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
|
||||
|
||||
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
|
||||
|
||||
@ -156,5 +206,3 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-
|
||||
## See also
|
||||
|
||||
- [S3 table function](../../../sql-reference/table-functions/s3.md)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/s3/) <!--hide-->
|
||||
|
@ -159,6 +159,9 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.<so|dl
|
||||
<models_config>/home/catboost/models/*_model.xml</models_config>
|
||||
```
|
||||
|
||||
!!! note "Note"
|
||||
You can change path to the CatBoost model configuration later without restarting server.
|
||||
|
||||
## 4. Run the Model Inference from SQL {#run-model-inference}
|
||||
|
||||
For test model run the ClickHouse client `$ clickhouse client`.
|
||||
|
@ -77,7 +77,8 @@ toc_title: Adopters
|
||||
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
|
||||
| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
|
||||
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
|
||||
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
|
||||
| <a href="https://clarity.microsoft.com/" class="favicon">Microsoft</a> | Web Analytics | Clarity (Main Product) | — | — | [A question on GitHub](https://github.com/ClickHouse/ClickHouse/issues/21556) |
|
||||
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |
|
||||
| <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
|
||||
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
|
||||
| <a href="https://www.netskope.com/" class="favicon">Netskope</a> | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) |
|
||||
|
@ -4,7 +4,9 @@ Contains information about columns in all the tables.
|
||||
|
||||
You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.
|
||||
|
||||
The `system.columns` table contains the following columns (the column type is shown in brackets):
|
||||
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
@ -26,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) select * from system.columns LIMIT 2 FORMAT Vertical;
|
||||
SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
@ -65,8 +67,6 @@ is_in_sorting_key: 0
|
||||
is_in_primary_key: 0
|
||||
is_in_sampling_key: 0
|
||||
compression_codec:
|
||||
|
||||
2 rows in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->
|
||||
|
@ -1,59 +1,65 @@
|
||||
# system.tables {#system-tables}
|
||||
|
||||
Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`.
|
||||
Contains metadata of each table that the server knows about.
|
||||
|
||||
This table contains the following columns (the column type is shown in brackets):
|
||||
[Detached](../../sql-reference/statements/detach.md) tables are not shown in `system.tables`.
|
||||
|
||||
- `database` (String) — The name of the database the table is in.
|
||||
[Temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.tables` only in those session where they have been created. They are shown with the empty `database` field and with the `is_temporary` flag switched on.
|
||||
|
||||
- `name` (String) — Table name.
|
||||
Columns:
|
||||
|
||||
- `engine` (String) — Table engine name (without parameters).
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
|
||||
|
||||
- `is_temporary` (UInt8) - Flag that indicates whether the table is temporary.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
|
||||
- `data_path` (String) - Path to the table data in the file system.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters).
|
||||
|
||||
- `metadata_path` (String) - Path to the table metadata in the file system.
|
||||
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary.
|
||||
|
||||
- `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata.
|
||||
- `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system.
|
||||
|
||||
- `dependencies_database` (Array(String)) - Database dependencies.
|
||||
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system.
|
||||
|
||||
- `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
|
||||
- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.
|
||||
|
||||
- `create_table_query` (String) - The query that was used to create the table.
|
||||
- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.
|
||||
|
||||
- `engine_full` (String) - Parameters of the table engine.
|
||||
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
|
||||
|
||||
- `partition_key` (String) - The partition key expression specified in the table.
|
||||
- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.
|
||||
|
||||
- `sorting_key` (String) - The sorting key expression specified in the table.
|
||||
- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine.
|
||||
|
||||
- `primary_key` (String) - The primary key expression specified in the table.
|
||||
- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table.
|
||||
|
||||
- `sampling_key` (String) - The sampling key expression specified in the table.
|
||||
- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table.
|
||||
|
||||
- `storage_policy` (String) - The storage policy:
|
||||
- `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table.
|
||||
|
||||
- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table.
|
||||
|
||||
- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy:
|
||||
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table).
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table).
|
||||
|
||||
- `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage).
|
||||
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage).
|
||||
|
||||
- If the table stores data on disk, returns used space on disk (i.e. compressed).
|
||||
- If the table stores data in memory, returns approximated number of used bytes in memory.
|
||||
|
||||
- `lifetime_rows` (Nullable(UInt64)) - Total number of rows INSERTed since server start (only for `Buffer` tables).
|
||||
- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables).
|
||||
|
||||
- `lifetime_bytes` (Nullable(UInt64)) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
|
||||
- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
|
||||
|
||||
The `system.tables` table is used in `SHOW TABLES` query implementation.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
|
||||
SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
@ -100,8 +106,6 @@ sampling_key:
|
||||
storage_policy:
|
||||
total_rows: ᴺᵁᴸᴸ
|
||||
total_bytes: ᴺᵁᴸᴸ
|
||||
|
||||
2 rows in set. Elapsed: 0.004 sec.
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) <!--hide-->
|
||||
|
@ -29,6 +29,3 @@ $ sudo apt-get update
|
||||
$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
|
||||
$ sudo service clickhouse-server restart
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
@ -27,7 +27,37 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘a
|
||||
|
||||
## -SimpleState {#agg-functions-combinator-simplestate}
|
||||
|
||||
If you apply this combinator, the aggregate function returns the same value but with a different type. This is an `SimpleAggregateFunction(...)` that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines.
|
||||
If you apply this combinator, the aggregate function returns the same value but with a different type. This is a [SimpleAggregateFunction(...)](../../sql-reference/data-types/simpleaggregatefunction.md) that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) tables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
<aggFunction>SimpleState(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — Aggregate function parameters.
|
||||
|
||||
**Returned values**
|
||||
|
||||
The value of an aggregate function with the `SimpleAggregateFunction(...)` type.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(c)────────────────────────┬─c─┐
|
||||
│ SimpleAggregateFunction(any, UInt64) │ 0 │
|
||||
└──────────────────────────────────────┴───┘
|
||||
```
|
||||
|
||||
## -State {#agg-functions-combinator-state}
|
||||
|
||||
@ -249,4 +279,3 @@ FROM people
|
||||
└────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
|
@ -6,7 +6,7 @@ toc_priority: 207
|
||||
|
||||
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm.
|
||||
|
||||
The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
|
||||
Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
|
||||
|
||||
The performance of the function is lower than performance of [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile) or [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`.
|
||||
|
||||
|
@ -38,4 +38,3 @@ We recommend using this function in almost all scenarios.
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -49,4 +49,3 @@ Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -23,4 +23,3 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -37,4 +37,3 @@ We don’t recommend using this function. In most cases, use the [uniq](../../..
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -1,39 +0,0 @@
|
||||
---
|
||||
toc_priority: 195
|
||||
---
|
||||
|
||||
# uniqThetaSketch {#agg_function-uniqthetasketch}
|
||||
|
||||
Calculates the approximate number of different argument values, using the [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html).
|
||||
|
||||
``` sql
|
||||
uniqThetaSketch(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Function:
|
||||
|
||||
- Calculates a hash for all parameters in the aggregate, then uses it in calculations.
|
||||
|
||||
- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values.
|
||||
|
||||
4096(2^12) 64-bit sketch are used. The size of the state is about 41 KB.
|
||||
|
||||
- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
@ -2,6 +2,8 @@
|
||||
|
||||
`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data.
|
||||
|
||||
The common way to produce an aggregate function value is by calling the aggregate function with the [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate) suffix.
|
||||
|
||||
The following aggregate functions are supported:
|
||||
|
||||
- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
|
||||
|
@ -16,46 +16,60 @@ The following assumptions are made:
|
||||
|
||||
## visitParamHas(params, name) {#visitparamhasparams-name}
|
||||
|
||||
Checks whether there is a field with the ‘name’ name.
|
||||
Checks whether there is a field with the `name` name.
|
||||
|
||||
Alias: `simpleJSONHas`.
|
||||
|
||||
## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}
|
||||
|
||||
Parses UInt64 from the value of the field named ‘name’. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
|
||||
Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
|
||||
|
||||
Alias: `simpleJSONExtractUInt`.
|
||||
|
||||
## visitParamExtractInt(params, name) {#visitparamextractintparams-name}
|
||||
|
||||
The same as for Int64.
|
||||
|
||||
Alias: `simpleJSONExtractInt`.
|
||||
|
||||
## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}
|
||||
|
||||
The same as for Float64.
|
||||
|
||||
Alias: `simpleJSONExtractFloat`.
|
||||
|
||||
## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}
|
||||
|
||||
Parses a true/false value. The result is UInt8.
|
||||
|
||||
Alias: `simpleJSONExtractBool`.
|
||||
|
||||
## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}
|
||||
|
||||
Returns the value of a field, including separators.
|
||||
|
||||
Alias: `simpleJSONExtractRaw`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
|
||||
```
|
||||
|
||||
## visitParamExtractString(params, name) {#visitparamextractstringparams-name}
|
||||
|
||||
Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
|
||||
|
||||
Alias: `simpleJSONExtractString`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = '';
|
||||
```
|
||||
|
||||
There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
|
||||
|
@ -1192,6 +1192,109 @@ SELECT defaultValueOfTypeName('Nullable(Int8)')
|
||||
└──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## indexHint {#indexhint}
|
||||
The function is intended for debugging and introspection purposes. The function ignores it's argument and always returns 1. Arguments are not even evaluated.
|
||||
|
||||
But for the purpose of index analysis, the argument of this function is analyzed as if it was present directly without being wrapped inside `indexHint` function. This allows to select data in index ranges by the corresponding condition but without further filtering by this condition. The index in ClickHouse is sparse and using `indexHint` will yield more data than specifying the same condition directly.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
SELECT * FROM table WHERE indexHint(<expression>)
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
1. Type: [Uint8](https://clickhouse.yandex/docs/en/data_types/int_uint/#diapazony-uint).
|
||||
|
||||
**Example**
|
||||
|
||||
Here is the example of test data from the table [ontime](../../getting-started/example-datasets/ontime.md).
|
||||
|
||||
Input table:
|
||||
|
||||
```sql
|
||||
SELECT count() FROM ontime
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┐
|
||||
│ 4276457 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
The table has indexes on the fields `(FlightDate, (Year, FlightDate))`.
|
||||
|
||||
Create a query, where the index is not used.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
|
||||
```
|
||||
|
||||
ClickHouse processed the entire table (`Processed 4.28 million rows`).
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-01-01 │ 13970 │
|
||||
│ 2017-01-02 │ 15882 │
|
||||
........................
|
||||
│ 2017-09-28 │ 16411 │
|
||||
│ 2017-09-29 │ 16384 │
|
||||
│ 2017-09-30 │ 12520 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
|
||||
To apply the index, select a specific date.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
|
||||
```
|
||||
|
||||
By using the index, ClickHouse processed a significantly smaller number of rows (`Processed 32.74 thousand rows`).
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-09-15 │ 16428 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
|
||||
Now wrap the expression `k = '2017-09-15'` into `indexHint` function.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
FROM ontime
|
||||
WHERE indexHint(k = '2017-09-15')
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
```
|
||||
|
||||
ClickHouse used the index in the same way as the previous time (`Processed 32.74 thousand rows`).
|
||||
The expression `k = '2017-09-15'` was not used when generating the result.
|
||||
In examle the `indexHint` function allows to see adjacent dates.
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-09-14 │ 7071 │
|
||||
│ 2017-09-15 │ 16428 │
|
||||
│ 2017-09-16 │ 1077 │
|
||||
│ 2017-09-30 │ 8167 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
|
||||
## replicate {#other-functions-replicate}
|
||||
|
||||
Creates an array with a single value.
|
||||
|
@ -74,6 +74,9 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,
|
||||
|
||||
Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.
|
||||
|
||||
!!! warning "Warning"
|
||||
You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
@ -180,7 +183,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## See Also
|
||||
**See Also**
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
|
@ -50,15 +50,32 @@ Creates a table with the same result as that of the [table function](../../../sq
|
||||
### From SELECT query {#from-select-query}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
|
||||
```
|
||||
|
||||
Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from SELECT.
|
||||
Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from `SELECT`. Also you can explicitly specify columns description.
|
||||
|
||||
In all cases, if `IF NOT EXISTS` is specified, the query won’t return an error if the table already exists. In this case, the query won’t do anything.
|
||||
If the table already exists and `IF NOT EXISTS` is specified, the query won’t do anything.
|
||||
|
||||
There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../../../engines/table-engines/index.md#table_engines).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
|
||||
SELECT x, toTypeName(x) FROM t1;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─x─┬─toTypeName(x)─┐
|
||||
│ 1 │ String │
|
||||
└───┴───────────────┘
|
||||
```
|
||||
|
||||
## NULL Or NOT NULL Modifiers {#null-modifiers}
|
||||
|
||||
`NULL` and `NOT NULL` modifiers after data type in column definition allow or do not allow it to be [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable).
|
||||
|
@ -18,7 +18,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -48,6 +48,14 @@ toc_title: "Введение"
|
||||
|
||||
Движки семейства:
|
||||
|
||||
- [Kafka](integrations/kafka.md#kafka)
|
||||
- [MySQL](integrations/mysql.md#mysql)
|
||||
- [ODBC](integrations/odbc.md#table-engine-odbc)
|
||||
- [JDBC](integrations/jdbc.md#table-engine-jdbc)
|
||||
- [S3](integrations/s3.md#table-engine-s3)
|
||||
|
||||
### Специальные движки {#spetsialnye-dvizhki}
|
||||
|
||||
- [ODBC](../../engines/table-engines/integrations/odbc.md)
|
||||
- [JDBC](../../engines/table-engines/integrations/jdbc.md)
|
||||
- [MySQL](../../engines/table-engines/integrations/mysql.md)
|
||||
@ -84,4 +92,3 @@ toc_title: "Введение"
|
||||
Чтобы получить данные из виртуального столбца, необходимо указать его название в запросе `SELECT`. `SELECT *` не отображает данные из виртуальных столбцов.
|
||||
|
||||
При создании таблицы со столбцом, имя которого совпадает с именем одного из виртуальных столбцов таблицы, виртуальный столбец становится недоступным. Не делайте так. Чтобы помочь избежать конфликтов, имена виртуальных столбцов обычно предваряются подчеркиванием.
|
||||
|
||||
|
@ -19,7 +19,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure,
|
||||
- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. Подробнее смотри [ниже](#wildcards-in-path).
|
||||
- `format` — [формат](../../../interfaces/formats.md#formats) файла.
|
||||
- `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла.
|
||||
- `compression` — тип сжатия. Возможные значения: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла.
|
||||
|
||||
**Пример**
|
||||
|
||||
@ -73,17 +73,17 @@ SELECT * FROM s3_engine_table LIMIT 2;
|
||||
|
||||
Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`.
|
||||
|
||||
## Настройки конечных точек {#endpoint-settings}
|
||||
## Настройки точки приема запроса {#endpoint-settings}
|
||||
|
||||
Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:
|
||||
Для точки приема запроса (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:
|
||||
|
||||
Обязательная настройка:
|
||||
- `endpoint` — указывает префикс конечной точки.
|
||||
- `endpoint` — указывает префикс точки приема запроса.
|
||||
|
||||
Необязательные настройки:
|
||||
- `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной конечной точкой.
|
||||
- `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`.
|
||||
- `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз.
|
||||
- `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной точкой приема запроса.
|
||||
- `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной точки приема запроса. Значение по умолчанию - `false`.
|
||||
- `header` — добавляет указанный HTTP-заголовок к запросу на заданную точку приема запроса. Может быть определен несколько раз.
|
||||
- `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C.
|
||||
|
||||
**Пример**
|
||||
@ -133,8 +133,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
|
||||
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
|
||||
```
|
||||
|
||||
!!! warning "Warning"
|
||||
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
|
||||
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
|
||||
|
||||
4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
|
||||
|
||||
@ -145,6 +144,3 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-
|
||||
**Смотрите также**
|
||||
|
||||
- [Табличная функция S3](../../../sql-reference/table-functions/s3.md)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/s3/) <!--hide-->
|
||||
|
||||
|
@ -158,7 +158,9 @@ FROM amazon_train
|
||||
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
|
||||
<models_config>/home/catboost/models/*_model.xml</models_config>
|
||||
```
|
||||
|
||||
!!! note "Примечание"
|
||||
Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
|
||||
|
||||
## 4. Запустите вывод модели из SQL {#run-model-inference}
|
||||
|
||||
Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
|
||||
|
@ -4,7 +4,9 @@
|
||||
|
||||
С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.
|
||||
|
||||
Таблица `system.columns` содержит столбцы (тип столбца указан в скобках):
|
||||
Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое.
|
||||
|
||||
Cтолбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
@ -23,3 +25,46 @@
|
||||
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
|
||||
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
database: system
|
||||
table: aggregate_function_combinators
|
||||
name: name
|
||||
type: String
|
||||
default_kind:
|
||||
default_expression:
|
||||
data_compressed_bytes: 0
|
||||
data_uncompressed_bytes: 0
|
||||
marks_bytes: 0
|
||||
comment:
|
||||
is_in_partition_key: 0
|
||||
is_in_sorting_key: 0
|
||||
is_in_primary_key: 0
|
||||
is_in_sampling_key: 0
|
||||
compression_codec:
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
database: system
|
||||
table: aggregate_function_combinators
|
||||
name: is_internal
|
||||
type: UInt8
|
||||
default_kind:
|
||||
default_expression:
|
||||
data_compressed_bytes: 0
|
||||
data_uncompressed_bytes: 0
|
||||
marks_bytes: 0
|
||||
comment:
|
||||
is_in_partition_key: 0
|
||||
is_in_sorting_key: 0
|
||||
is_in_primary_key: 0
|
||||
is_in_sampling_key: 0
|
||||
compression_codec:
|
||||
```
|
||||
|
@ -1,39 +1,94 @@
|
||||
# system.tables {#system-tables}
|
||||
|
||||
Содержит метаданные каждой таблицы, о которой знает сервер. Отсоединённые таблицы не отображаются в `system.tables`.
|
||||
Содержит метаданные каждой таблицы, о которой знает сервер.
|
||||
|
||||
Эта таблица содержит следующие столбцы (тип столбца показан в скобках):
|
||||
Отсоединённые таблицы ([DETACH](../../sql-reference/statements/detach.md)) не отображаются в `system.tables`.
|
||||
|
||||
- `database String` — имя базы данных, в которой находится таблица.
|
||||
- `name` (String) — имя таблицы.
|
||||
- `engine` (String) — движок таблицы (без параметров).
|
||||
- `is_temporary` (UInt8) — флаг, указывающий на то, временная это таблица или нет.
|
||||
- `data_path` (String) — путь к данным таблицы в файловой системе.
|
||||
- `metadata_path` (String) — путь к табличным метаданным в файловой системе.
|
||||
- `metadata_modification_time` (DateTime) — время последней модификации табличных метаданных.
|
||||
- `dependencies_database` (Array(String)) — зависимости базы данных.
|
||||
- `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
|
||||
- `create_table_query` (String) — запрос, которым создавалась таблица.
|
||||
- `engine_full` (String) — параметры табличного движка.
|
||||
- `partition_key` (String) — ключ партиционирования таблицы.
|
||||
- `sorting_key` (String) — ключ сортировки таблицы.
|
||||
- `primary_key` (String) - первичный ключ таблицы.
|
||||
- `sampling_key` (String) — ключ сэмплирования таблицы.
|
||||
- `storage_policy` (String) - политика хранения данных:
|
||||
Информация о [временных таблицах](../../sql-reference/statements/create/table.md#temporary-tables) содержится в `system.tables` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких таблиц пустое, а флаг `is_temporary` включен.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
|
||||
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
|
||||
- `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
|
||||
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
|
||||
- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных.
|
||||
- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных.
|
||||
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
|
||||
- `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица.
|
||||
- `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка.
|
||||
- `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы.
|
||||
- `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы.
|
||||
- `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы.
|
||||
- `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы.
|
||||
- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных:
|
||||
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` (Nullable(UInt64)) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `NULL` (включая базовую таблицу `Buffer`).
|
||||
|
||||
- `total_bytes` (Nullable(UInt64)) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
|
||||
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `NULL` (не включает в себя никакого базового хранилища).
|
||||
|
||||
- Если таблица хранит данные на диске, возвращает используемое пространство на диске (т. е. сжатое).
|
||||
- Если таблица хранит данные в памяти, возвращает приблизительное количество используемых байт в памяти.
|
||||
|
||||
- `lifetime_rows` (Nullable(UInt64)) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
|
||||
- `lifetime_bytes` (Nullable(UInt64)) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
|
||||
Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
database: system
|
||||
name: aggregate_function_combinators
|
||||
uuid: 00000000-0000-0000-0000-000000000000
|
||||
engine: SystemAggregateFunctionCombinators
|
||||
is_temporary: 0
|
||||
data_paths: []
|
||||
metadata_path: /var/lib/clickhouse/metadata/system/aggregate_function_combinators.sql
|
||||
metadata_modification_time: 1970-01-01 03:00:00
|
||||
dependencies_database: []
|
||||
dependencies_table: []
|
||||
create_table_query:
|
||||
engine_full:
|
||||
partition_key:
|
||||
sorting_key:
|
||||
primary_key:
|
||||
sampling_key:
|
||||
storage_policy:
|
||||
total_rows: ᴺᵁᴸᴸ
|
||||
total_bytes: ᴺᵁᴸᴸ
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
database: system
|
||||
name: asynchronous_metrics
|
||||
uuid: 00000000-0000-0000-0000-000000000000
|
||||
engine: SystemAsynchronousMetrics
|
||||
is_temporary: 0
|
||||
data_paths: []
|
||||
metadata_path: /var/lib/clickhouse/metadata/system/asynchronous_metrics.sql
|
||||
metadata_modification_time: 1970-01-01 03:00:00
|
||||
dependencies_database: []
|
||||
dependencies_table: []
|
||||
create_table_query:
|
||||
engine_full:
|
||||
partition_key:
|
||||
sorting_key:
|
||||
primary_key:
|
||||
sampling_key:
|
||||
storage_policy:
|
||||
total_rows: ᴺᵁᴸᴸ
|
||||
total_bytes: ᴺᵁᴸᴸ
|
||||
```
|
||||
|
@ -29,5 +29,3 @@ $ sudo apt-get update
|
||||
$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
|
||||
$ sudo service clickhouse-server restart
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/update/) <!--hide-->
|
||||
|
@ -27,6 +27,40 @@ toc_title: "Комбинаторы агрегатных функций"
|
||||
|
||||
Комбинаторы -If и -Array можно сочетать. При этом, должен сначала идти Array, а потом If. Примеры: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Из-за такого порядка получается, что аргумент cond не должен быть массивом.
|
||||
|
||||
## -SimpleState {#agg-functions-combinator-simplestate}
|
||||
|
||||
При использовании этого комбинатора агрегатная функция возвращает то же значение, но типа [SimpleAggregateFunction(...)](../../sql-reference/data-types/simpleaggregatefunction.md). Текущее значение функции может храниться в таблице для последующей работы с таблицами семейства [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
<aggFunction>SimpleState(x)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `x` — параметры агрегатной функции.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Значение агрегатной функции типа `SimpleAggregateFunction(...)`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(c)────────────────────────┬─c─┐
|
||||
│ SimpleAggregateFunction(any, UInt64) │ 0 │
|
||||
└──────────────────────────────────────┴───┘
|
||||
```
|
||||
|
||||
## -State {#state}
|
||||
|
||||
В случае применения этого комбинатора, агрегатная функция возвращает не готовое значение (например, в случае функции [uniq](reference/uniq.md#agg_function-uniq) — количество уникальных значений), а промежуточное состояние агрегации (например, в случае функции `uniq` — хэш-таблицу для расчёта количества уникальных значений), которое имеет тип `AggregateFunction(...)` и может использоваться для дальнейшей обработки или может быть сохранено в таблицу для последующей доагрегации.
|
||||
@ -247,4 +281,3 @@ FROM people
|
||||
│ [3,2] │ [11.5,12.949999809265137] │
|
||||
└────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк,
|
||||
а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому хранить и обрабатывать какие-либо дополнительные данные не требуется.
|
||||
|
||||
Чтобы получить промежуточное значение, обычно используются агрегатные функции с суффиксом [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate).
|
||||
|
||||
Поддерживаются следующие агрегатные функции:
|
||||
|
||||
- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
|
||||
|
@ -16,51 +16,65 @@ toc_title: JSON
|
||||
|
||||
## visitParamHas(params, name) {#visitparamhasparams-name}
|
||||
|
||||
Проверить наличие поля с именем name.
|
||||
Проверяет наличие поля с именем `name`.
|
||||
|
||||
Алиас: `simpleJSONHas`.
|
||||
|
||||
## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}
|
||||
|
||||
Распарсить UInt64 из значения поля с именем name. Если поле строковое - попытаться распарсить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то вернуть 0.
|
||||
Пытается выделить число типа UInt64 из значения поля с именем `name`. Если поле строковое, пытается выделить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то возвращает 0.
|
||||
|
||||
Алиас: `simpleJSONExtractUInt`.
|
||||
|
||||
## visitParamExtractInt(params, name) {#visitparamextractintparams-name}
|
||||
|
||||
Аналогично для Int64.
|
||||
|
||||
Алиас: `simpleJSONExtractInt`.
|
||||
|
||||
## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}
|
||||
|
||||
Аналогично для Float64.
|
||||
|
||||
Алиас: `simpleJSONExtractFloat`.
|
||||
|
||||
## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}
|
||||
|
||||
Распарсить значение true/false. Результат - UInt8.
|
||||
Пытается выделить значение true/false. Результат — UInt8.
|
||||
|
||||
Алиас: `simpleJSONExtractBool`.
|
||||
|
||||
## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}
|
||||
|
||||
Вернуть значение поля, включая разделители.
|
||||
Возвращает значение поля, включая разделители.
|
||||
|
||||
Алиас: `simpleJSONExtractRaw`.
|
||||
|
||||
Примеры:
|
||||
|
||||
``` sql
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
|
||||
```
|
||||
|
||||
## visitParamExtractString(params, name) {#visitparamextractstringparams-name}
|
||||
|
||||
Распарсить строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
|
||||
Разбирает строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
|
||||
|
||||
Алиас: `simpleJSONExtractString`.
|
||||
|
||||
Примеры:
|
||||
|
||||
``` sql
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = '';
|
||||
```
|
||||
|
||||
На данный момент, не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
|
||||
На данный момент не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
|
||||
|
||||
Следующие функции используют [simdjson](https://github.com/lemire/simdjson) который разработан под более сложные требования для разбора JSON. Упомянутое выше предположение 2 по-прежнему применимо.
|
||||
Следующие функции используют [simdjson](https://github.com/lemire/simdjson), который разработан под более сложные требования для разбора JSON. Упомянутое выше допущение 2 по-прежнему применимо.
|
||||
|
||||
## isValidJSON(json) {#isvalidjsonjson}
|
||||
|
||||
@ -292,4 +306,3 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello"
|
||||
│ [('d','"hello"'),('f','"world"')] │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -1133,6 +1133,111 @@ SELECT defaultValueOfTypeName('Nullable(Int8)')
|
||||
└──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## indexHint {#indexhint}
|
||||
Возвращает все данные из диапазона, в который попадают данные, соответствующие указанному выражению.
|
||||
Переданное выражение не будет вычислено. Выбор диапазона производится по индексу.
|
||||
Индекс в ClickHouse разреженный, при чтении диапазона в ответ попадают «лишние» соседние данные.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
SELECT * FROM table WHERE indexHint(<expression>)
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Возвращает диапазон индекса, в котором выполняется заданное условие.
|
||||
|
||||
Тип: [Uint8](https://clickhouse.yandex/docs/ru/data_types/int_uint/#diapazony-uint).
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим пример с использованием тестовых данных таблицы [ontime](../../getting-started/example-datasets/ontime.md).
|
||||
|
||||
Исходная таблица:
|
||||
|
||||
```sql
|
||||
SELECT count() FROM ontime
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┐
|
||||
│ 4276457 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
В таблице есть индексы по полям `(FlightDate, (Year, FlightDate))`.
|
||||
|
||||
Выполним выборку по дате, где индекс не используется.
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
|
||||
```
|
||||
|
||||
ClickHouse обработал всю таблицу (`Processed 4.28 million rows`).
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-01-01 │ 13970 │
|
||||
│ 2017-01-02 │ 15882 │
|
||||
........................
|
||||
│ 2017-09-28 │ 16411 │
|
||||
│ 2017-09-29 │ 16384 │
|
||||
│ 2017-09-30 │ 12520 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
|
||||
Для подключения индекса выбираем конкретную дату.
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
|
||||
```
|
||||
|
||||
При использовании индекса ClickHouse обработал значительно меньшее количество строк (`Processed 32.74 thousand rows`).
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-09-15 │ 16428 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
|
||||
Передадим в функцию `indexHint` выражение `k = '2017-09-15'`.
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
FROM ontime
|
||||
WHERE indexHint(k = '2017-09-15')
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
```
|
||||
|
||||
ClickHouse применил индекс по аналогии с примером выше (`Processed 32.74 thousand rows`).
|
||||
Выражение `k = '2017-09-15'` не используется при формировании результата.
|
||||
Функция `indexHint` позволяет увидеть соседние данные.
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-09-14 │ 7071 │
|
||||
│ 2017-09-15 │ 16428 │
|
||||
│ 2017-09-16 │ 1077 │
|
||||
│ 2017-09-30 │ 8167 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
|
||||
## replicate {#other-functions-replicate}
|
||||
|
||||
Создает массив, заполненный одним значением.
|
||||
|
@ -63,6 +63,9 @@ DROP COLUMN [IF EXISTS] name
|
||||
|
||||
Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно.
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
@ -155,7 +158,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## Смотрите также
|
||||
**Смотрите также**
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
|
@ -46,15 +46,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
|
||||
### Из запроса SELECT {#from-select-query}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
|
||||
```
|
||||
|
||||
Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком engine, и заполняет её данными из SELECT-а.
|
||||
Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком `engine`, и заполняет её данными из `SELECT`. Также вы можете явно задать описание столбцов.
|
||||
|
||||
Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать.
|
||||
Если таблица уже существует и указано `IF NOT EXISTS`, то запрос ничего не делает.
|
||||
|
||||
После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../../engines/table-engines/index.md#table_engines).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
|
||||
SELECT x, toTypeName(x) FROM t1;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─x─┬─toTypeName(x)─┐
|
||||
│ 1 │ String │
|
||||
└───┴───────────────┘
|
||||
```
|
||||
|
||||
## Модификатор NULL или NOT NULL {#null-modifiers}
|
||||
|
||||
Модификатор `NULL` или `NOT NULL`, указанный после типа данных в определении столбца, позволяет или не позволяет типу данных быть [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable).
|
||||
@ -230,7 +247,7 @@ CREATE TABLE codec_example
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
```
|
||||
## Временные таблицы {#vremennye-tablitsy}
|
||||
## Временные таблицы {#temporary-tables}
|
||||
|
||||
ClickHouse поддерживает временные таблицы со следующими характеристиками:
|
||||
|
||||
|
@ -18,7 +18,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
|
||||
- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. Подробнее смотри [здесь](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
- `format` — [формат](../../interfaces/formats.md#formats) файла.
|
||||
- `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр.
|
||||
- `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Необязательный параметр.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
|
@ -477,6 +477,103 @@ FROM
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
|
||||
|
||||
## indexHint {#indexhint}
|
||||
输出符合索引选择范围内的所有数据,同时不实用参数中的表达式进行过滤。
|
||||
|
||||
传递给函数的表达式参数将不会被计算,但ClickHouse使用参数中的表达式进行索引过滤。
|
||||
|
||||
**返回值**
|
||||
|
||||
- 1。
|
||||
|
||||
**示例**
|
||||
|
||||
这是一个包含[ontime](../../getting-started/example-datasets/ontime.md)测试数据集的测试表。
|
||||
|
||||
```
|
||||
SELECT count() FROM ontime
|
||||
|
||||
┌─count()─┐
|
||||
│ 4276457 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
该表使用`(FlightDate, (Year, FlightDate))`作为索引。
|
||||
|
||||
对该表进行如下的查询:
|
||||
|
||||
```
|
||||
:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
|
||||
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
FROM ontime
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-01-01 │ 13970 │
|
||||
│ 2017-01-02 │ 15882 │
|
||||
........................
|
||||
│ 2017-09-28 │ 16411 │
|
||||
│ 2017-09-29 │ 16384 │
|
||||
│ 2017-09-30 │ 12520 │
|
||||
└────────────┴─────────┘
|
||||
|
||||
273 rows in set. Elapsed: 0.072 sec. Processed 4.28 million rows, 8.55 MB (59.00 million rows/s., 118.01 MB/s.)
|
||||
```
|
||||
|
||||
在这个查询中,由于没有使用索引,所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询:
|
||||
|
||||
```
|
||||
:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
|
||||
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
FROM ontime
|
||||
WHERE k = '2017-09-15'
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-09-15 │ 16428 │
|
||||
└────────────┴─────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.014 sec. Processed 32.74 thousand rows, 65.49 KB (2.31 million rows/s., 4.63 MB/s.)
|
||||
```
|
||||
|
||||
在最后一行的显示中,通过索引ClickHouse处理的行数明显减少(`Processed 32.74 thousand rows`)。
|
||||
|
||||
现在将表达式`k = '2017-09-15'`传递给`indexHint`函数:
|
||||
|
||||
```
|
||||
:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
|
||||
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
FROM ontime
|
||||
WHERE indexHint(k = '2017-09-15')
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
|
||||
┌──────────k─┬─count()─┐
|
||||
│ 2017-09-14 │ 7071 │
|
||||
│ 2017-09-15 │ 16428 │
|
||||
│ 2017-09-16 │ 1077 │
|
||||
│ 2017-09-30 │ 8167 │
|
||||
└────────────┴─────────┘
|
||||
|
||||
4 rows in set. Elapsed: 0.004 sec. Processed 32.74 thousand rows, 65.49 KB (8.97 million rows/s., 17.94 MB/s.)
|
||||
```
|
||||
|
||||
对于这个请求,根据ClickHouse显示ClickHouse与上一次相同的方式应用了索引(`Processed 32.74 thousand rows`)。但是,最终返回的结果集中并没有根据`k = '2017-09-15'`表达式进行过滤结果。
|
||||
|
||||
由于ClickHouse中使用稀疏索引,因此在读取范围时(本示例中为相邻日期),"额外"的数据将包含在索引结果中。使用`indexHint`函数可以查看到它们。
|
||||
|
||||
## 复制 {#replicate}
|
||||
|
||||
使用单个值填充一个数组。
|
||||
|
@ -33,8 +33,12 @@ option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data t
|
||||
${ENABLE_CLICKHOUSE_ALL})
|
||||
|
||||
# https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/
|
||||
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
|
||||
${ENABLE_CLICKHOUSE_ALL})
|
||||
if (ENABLE_ODBC)
|
||||
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
|
||||
${ENABLE_CLICKHOUSE_ALL})
|
||||
else ()
|
||||
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver" OFF)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source"
|
||||
${ENABLE_CLICKHOUSE_ALL})
|
||||
|
11
programs/server/.gitignore
vendored
11
programs/server/.gitignore
vendored
@ -1,8 +1,11 @@
|
||||
/access
|
||||
/dictionaries_lib
|
||||
/flags
|
||||
/format_schemas
|
||||
/metadata
|
||||
/metadata_dropped
|
||||
/data
|
||||
/store
|
||||
/access
|
||||
/flags
|
||||
/dictionaries_lib
|
||||
/format_schemas
|
||||
/preprocessed_configs
|
||||
/shadow
|
||||
/tmp
|
||||
|
@ -19,6 +19,7 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
clickhouse_storages_system
|
||||
clickhouse_table_functions
|
||||
string_utils
|
||||
jemalloc
|
||||
|
||||
${LINK_RESOURCE_LIB}
|
||||
|
||||
|
@ -101,6 +101,10 @@
|
||||
# include <Server/KeeperTCPHandlerFactory.h>
|
||||
#endif
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric Revision;
|
||||
@ -109,11 +113,35 @@ namespace CurrentMetrics
|
||||
extern const Metric MaxDDLEntryID;
|
||||
}
|
||||
|
||||
#if USE_JEMALLOC
|
||||
static bool jemallocOptionEnabled(const char *name)
|
||||
{
|
||||
bool value;
|
||||
size_t size = sizeof(value);
|
||||
|
||||
if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
|
||||
throw Poco::SystemException("mallctl() failed");
|
||||
|
||||
return value;
|
||||
}
|
||||
#else
|
||||
static bool jemallocOptionEnabled(const char *) { return 0; }
|
||||
#endif
|
||||
|
||||
|
||||
int mainEntryClickHouseServer(int argc, char ** argv)
|
||||
{
|
||||
DB::Server app;
|
||||
|
||||
if (jemallocOptionEnabled("opt.background_thread"))
|
||||
{
|
||||
LOG_ERROR(&app.logger(),
|
||||
"jemalloc.background_thread was requested, "
|
||||
"however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
|
||||
"and also background_thread is not compatible with ClickHouse watchdog "
|
||||
"(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
|
||||
}
|
||||
|
||||
/// Do not fork separate process from watchdog if we attached to terminal.
|
||||
/// Otherwise it breaks gdb usage.
|
||||
/// Can be overridden by environment variable (cannot use server config at this moment).
|
||||
|
3
programs/server/data/.gitignore
vendored
3
programs/server/data/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
*.txt
|
||||
*.dat
|
||||
*.idx
|
1
programs/server/metadata/.gitignore
vendored
1
programs/server/metadata/.gitignore
vendored
@ -1 +0,0 @@
|
||||
*.sql
|
@ -132,12 +132,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
|
||||
|
||||
factory.registerFunction("uniqExact",
|
||||
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
factory.registerFunction("uniqThetaSketch",
|
||||
{createAggregateFunctionUniq<AggregateFunctionUniqThetaSketchData, AggregateFunctionUniqThetaSketchData>, properties});
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,7 +22,6 @@
|
||||
|
||||
#include <AggregateFunctions/UniquesHashSet.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/ThetaSketchData.h>
|
||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||
|
||||
|
||||
@ -125,19 +124,6 @@ struct AggregateFunctionUniqExactData<String>
|
||||
};
|
||||
|
||||
|
||||
/// uniqThetaSketch
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
struct AggregateFunctionUniqThetaSketchData
|
||||
{
|
||||
using Set = ThetaSketchData<UInt64>;
|
||||
Set set;
|
||||
|
||||
static String getName() { return "uniqThetaSketch"; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
@ -203,12 +189,6 @@ struct OneAdder
|
||||
data.set.insert(key);
|
||||
}
|
||||
}
|
||||
#if USE_DATASKETCHES
|
||||
else if constexpr (std::is_same_v<Data, AggregateFunctionUniqThetaSketchData>)
|
||||
{
|
||||
data.set.insertOriginal(column.getDataAt(row_num));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
@ -280,7 +280,7 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -163,7 +163,7 @@ public:
|
||||
sorted = false;
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wclass-memaccess"
|
||||
#endif
|
||||
@ -191,7 +191,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -1,119 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <memory>
|
||||
#include <theta_sketch.hpp> // Y_IGNORE
|
||||
#include <theta_union.hpp> // Y_IGNORE
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
template <typename Key>
|
||||
class ThetaSketchData : private boost::noncopyable
|
||||
{
|
||||
private:
|
||||
std::unique_ptr<datasketches::update_theta_sketch> sk_update;
|
||||
std::unique_ptr<datasketches::theta_union> sk_union;
|
||||
|
||||
inline datasketches::update_theta_sketch * getSkUpdate()
|
||||
{
|
||||
if (!sk_update)
|
||||
sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
|
||||
return sk_update.get();
|
||||
}
|
||||
|
||||
inline datasketches::theta_union * getSkUnion()
|
||||
{
|
||||
if (!sk_union)
|
||||
sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());
|
||||
return sk_union.get();
|
||||
}
|
||||
|
||||
public:
|
||||
using value_type = Key;
|
||||
|
||||
ThetaSketchData() = default;
|
||||
~ThetaSketchData() = default;
|
||||
|
||||
/// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
|
||||
void insertOriginal(const StringRef & value)
|
||||
{
|
||||
getSkUpdate()->update(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Note that `datasketches::update_theta_sketch.update` will do the hash again.
|
||||
void insert(Key value)
|
||||
{
|
||||
getSkUpdate()->update(value);
|
||||
}
|
||||
|
||||
UInt64 size() const
|
||||
{
|
||||
if (sk_union)
|
||||
return static_cast<UInt64>(sk_union->get_result().get_estimate());
|
||||
else if (sk_update)
|
||||
return static_cast<UInt64>(sk_update->get_estimate());
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void merge(const ThetaSketchData & rhs)
|
||||
{
|
||||
datasketches::theta_union * u = getSkUnion();
|
||||
|
||||
if (sk_update)
|
||||
{
|
||||
u->update(*sk_update);
|
||||
sk_update.reset(nullptr);
|
||||
}
|
||||
|
||||
if (rhs.sk_update)
|
||||
u->update(*rhs.sk_update);
|
||||
else if (rhs.sk_union)
|
||||
u->update(rhs.sk_union->get_result());
|
||||
}
|
||||
|
||||
/// You can only call for an empty object.
|
||||
void read(DB::ReadBuffer & in)
|
||||
{
|
||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||
readVectorBinary(bytes, in);
|
||||
if (!bytes.empty())
|
||||
{
|
||||
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
||||
getSkUnion()->update(sk);
|
||||
}
|
||||
}
|
||||
|
||||
void write(DB::WriteBuffer & out) const
|
||||
{
|
||||
if (sk_update)
|
||||
{
|
||||
auto bytes = sk_update->compact().serialize();
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
else if (sk_union)
|
||||
{
|
||||
auto bytes = sk_union->get_result().serialize();
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -50,6 +50,7 @@ SRCS(
|
||||
AggregateFunctionStatisticsSimple.cpp
|
||||
AggregateFunctionStudentTTest.cpp
|
||||
AggregateFunctionSum.cpp
|
||||
AggregateFunctionSumCount.cpp
|
||||
AggregateFunctionSumMap.cpp
|
||||
AggregateFunctionTopK.cpp
|
||||
AggregateFunctionUniq.cpp
|
||||
|
@ -13,6 +13,7 @@ namespace ErrorCodes
|
||||
extern const int MISMATCH_REPLICAS_DATA_SOURCES;
|
||||
extern const int NO_AVAILABLE_REPLICA;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
extern const int UNKNOWN_PACKET_FROM_SERVER;
|
||||
}
|
||||
|
||||
|
||||
@ -278,7 +279,22 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
Packet packet;
|
||||
{
|
||||
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
|
||||
packet = current_connection->receivePacket();
|
||||
|
||||
try
|
||||
{
|
||||
packet = current_connection->receivePacket();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
|
||||
{
|
||||
/// Exception may happen when packet is received, e.g. when got unknown packet.
|
||||
/// In this case, invalidate replica, so that we would not read from it anymore.
|
||||
current_connection->disconnect();
|
||||
invalidateReplica(state);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
switch (packet.type)
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
}
|
||||
|
||||
/// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -128,7 +128,7 @@ public:
|
||||
offsets.push_back(new_size);
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -277,7 +277,7 @@ private:
|
||||
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
|
||||
* In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
|
||||
*/
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
|
||||
#endif
|
||||
@ -359,6 +359,6 @@ extern template class Allocator<true, false>;
|
||||
extern template class Allocator<false, true>;
|
||||
extern template class Allocator<true, true>;
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -19,7 +19,7 @@ namespace DB
|
||||
struct UInt128
|
||||
{
|
||||
/// Suppress gcc7 warnings: 'prev_key.DB::UInt128::low' may be used uninitialized in this function
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -92,7 +92,7 @@ struct UInt128
|
||||
return static_cast<T>(low);
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -150,7 +150,7 @@ struct DummyUInt256
|
||||
{
|
||||
|
||||
/// Suppress gcc7 warnings: 'prev_key.DB::UInt256::a' may be used uninitialized in this function
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -179,7 +179,7 @@ struct DummyUInt256
|
||||
bool operator== (const UInt64 rhs) const { return a == rhs && b == 0 && c == 0 && d == 0; }
|
||||
bool operator!= (const UInt64 rhs) const { return !operator==(rhs); }
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -15,4 +15,3 @@
|
||||
#cmakedefine01 USE_GRPC
|
||||
#cmakedefine01 USE_STATS
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
#cmakedefine01 USE_DATASKETCHES
|
||||
|
@ -181,7 +181,7 @@ std::vector<std::pair<String, uint16_t>> parseRemoteDescriptionForExternalDataba
|
||||
size_t colon = address.find(':');
|
||||
if (colon == String::npos)
|
||||
{
|
||||
LOG_WARNING(&Poco::Logger::get("ParseRemoteDescription"), "Port is not found for host: {}. Using default port {}", default_port);
|
||||
LOG_WARNING(&Poco::Logger::get("ParseRemoteDescription"), "Port is not found for host: {}. Using default port {}", address, default_port);
|
||||
result.emplace_back(std::make_pair(address, default_port));
|
||||
}
|
||||
else
|
||||
|
@ -1,5 +1,5 @@
|
||||
/// Bug in GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
@ -263,6 +263,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -69,7 +69,7 @@ static void aggregate1(Map & map, Source::const_iterator begin, Source::const_it
|
||||
++map[*it];
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -122,7 +122,7 @@ static void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source:
|
||||
}
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -62,7 +62,7 @@ struct AggregateIndependent
|
||||
}
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -115,7 +115,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
|
||||
}
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -265,7 +265,7 @@ struct Creator
|
||||
void operator()(Value &) const {}
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -275,7 +275,7 @@ struct Updater
|
||||
void operator()(Value & x) const { ++x; }
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -484,7 +484,7 @@ DataTypes Block::getDataTypes() const
|
||||
|
||||
|
||||
template <typename ReturnType>
|
||||
static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
|
||||
static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_remove_constants)
|
||||
{
|
||||
auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]])
|
||||
{
|
||||
@ -515,7 +515,16 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons
|
||||
if (!actual.column || !expected.column)
|
||||
continue;
|
||||
|
||||
if (actual.column->getName() != expected.column->getName())
|
||||
const IColumn * actual_column = actual.column.get();
|
||||
|
||||
/// If we allow to remove constants, and expected column is not const, then unwrap actual constant column.
|
||||
if (allow_remove_constants && !isColumnConst(*expected.column))
|
||||
{
|
||||
if (const auto * column_const = typeid_cast<const ColumnConst *>(actual_column))
|
||||
actual_column = &column_const->getDataColumn();
|
||||
}
|
||||
|
||||
if (actual_column->getName() != expected.column->getName())
|
||||
return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n"
|
||||
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
@ -537,13 +546,25 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons
|
||||
|
||||
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs)
|
||||
{
|
||||
return checkBlockStructure<bool>(lhs, rhs, {});
|
||||
return checkBlockStructure<bool>(lhs, rhs, {}, false);
|
||||
}
|
||||
|
||||
|
||||
void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
|
||||
{
|
||||
checkBlockStructure<void>(lhs, rhs, context_description);
|
||||
checkBlockStructure<void>(lhs, rhs, context_description, false);
|
||||
}
|
||||
|
||||
|
||||
bool isCompatibleHeader(const Block & actual, const Block & desired)
|
||||
{
|
||||
return checkBlockStructure<bool>(actual, desired, {}, true);
|
||||
}
|
||||
|
||||
|
||||
void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description)
|
||||
{
|
||||
checkBlockStructure<void>(actual, desired, context_description, true);
|
||||
}
|
||||
|
||||
|
||||
|
@ -184,6 +184,12 @@ bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);
|
||||
/// Throw exception when blocks are different.
|
||||
void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description);
|
||||
|
||||
/// Actual header is compatible to desired if block have equal structure except constants.
|
||||
/// It is allowed when column from actual header is constant, but in desired is not.
|
||||
/// If both columns are constant, it is checked that they have the same value.
|
||||
bool isCompatibleHeader(const Block & actual, const Block & desired);
|
||||
void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description);
|
||||
|
||||
/// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns.
|
||||
void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);
|
||||
|
||||
|
@ -96,7 +96,7 @@ template <typename T> bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale
|
||||
template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
|
||||
template <typename T> bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale);
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -159,7 +159,7 @@ private:
|
||||
T dec;
|
||||
UInt32 scale;
|
||||
};
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -563,7 +563,7 @@ public:
|
||||
{
|
||||
case Types::Null: return f(field.template get<Null>());
|
||||
// gcc 8.2.1
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -583,7 +583,7 @@ public:
|
||||
case Types::Int128: return f(field.template get<Int128>());
|
||||
case Types::UInt256: return f(field.template get<UInt256>());
|
||||
case Types::Int256: return f(field.template get<Int256>());
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
@ -70,6 +70,7 @@ class IColumn;
|
||||
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
|
||||
M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
|
||||
M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
|
||||
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
|
||||
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
|
||||
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
|
||||
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
|
||||
@ -142,7 +143,7 @@ class IColumn;
|
||||
M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
|
||||
\
|
||||
M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(UInt64, min_bytes_to_use_mmap_io, (64 * 1024 * 1024), "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
|
||||
\
|
||||
M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
|
||||
@ -224,6 +225,7 @@ class IColumn;
|
||||
/** Settings for testing hedged requests */ \
|
||||
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
|
||||
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
|
||||
M(UInt64, unknown_packet_in_send_data, 0, "Send unknown packet instead of data Nth data packet", 0) \
|
||||
\
|
||||
M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
|
||||
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
|
||||
@ -446,6 +448,8 @@ class IColumn;
|
||||
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
|
||||
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
|
||||
M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
|
||||
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
\
|
||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||
\
|
||||
|
@ -15,7 +15,7 @@ namespace DB
|
||||
struct Null {};
|
||||
|
||||
/// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
@ -59,7 +59,7 @@ enum class TypeIndex
|
||||
LowCardinality,
|
||||
Map,
|
||||
};
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -19,9 +19,12 @@ public:
|
||||
TimezoneMixin(const TimezoneMixin &) = default;
|
||||
|
||||
const DateLUTImpl & getTimeZone() const { return time_zone; }
|
||||
bool hasExplicitTimeZone() const { return has_explicit_time_zone; }
|
||||
|
||||
protected:
|
||||
/// true if time zone name was provided in data type parameters, false if it's using default time zone.
|
||||
bool has_explicit_time_zone;
|
||||
|
||||
const DateLUTImpl & time_zone;
|
||||
const DateLUTImpl & utc_time_zone;
|
||||
};
|
||||
|
@ -365,8 +365,8 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid)
|
||||
/// 4. INSERT INTO table ...; (both Storage instances writes data without any synchronization)
|
||||
/// To avoid it, we remember UUIDs of detached tables and does not allow ATTACH table with such UUID until detached instance still in use.
|
||||
if (detached_tables.count(uuid))
|
||||
throw Exception("Cannot attach table with UUID " + toString(uuid) +
|
||||
", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS);
|
||||
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Cannot attach table with UUID {}, "
|
||||
"because it was detached but still used by some query. Retry later.", toString(uuid));
|
||||
}
|
||||
|
||||
void DatabaseAtomic::setDetachedTableNotInUseForce(const UUID & uuid)
|
||||
@ -573,12 +573,6 @@ void DatabaseAtomic::renameDictionaryInMemoryUnlocked(const StorageID & old_name
|
||||
}
|
||||
void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (detached_tables.count(uuid) == 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/// Table is in use while its shared_ptr counter is greater than 1.
|
||||
/// We cannot trigger condvar on shared_ptr destruction, so it's busy wait.
|
||||
while (true)
|
||||
@ -594,5 +588,13 @@ void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid)
|
||||
{
|
||||
DetachedTables not_in_use;
|
||||
std::lock_guard lock{mutex};
|
||||
not_in_use = cleanupDetachedTables();
|
||||
assertDetachedTableNotInUse(uuid);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -58,6 +58,7 @@ public:
|
||||
void tryRemoveSymlink(const String & table_name);
|
||||
|
||||
void waitDetachedTableNotInUse(const UUID & uuid) override;
|
||||
void checkDetachedTableNotInUse(const UUID & uuid) override;
|
||||
void setDetachedTableNotInUseForce(const UUID & uuid);
|
||||
|
||||
protected:
|
||||
|
@ -158,7 +158,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
|
||||
const auto & [remote_host_name, remote_port] = parseAddress(host_port, 3306);
|
||||
MySQLClient client(remote_host_name, remote_port, mysql_user_name, mysql_user_password);
|
||||
auto mysql_pool = mysqlxx::Pool(mysql_database_name, remote_host_name, mysql_user_name, mysql_user_password);
|
||||
auto mysql_pool = mysqlxx::Pool(mysql_database_name, remote_host_name, mysql_user_name, mysql_user_password, remote_port);
|
||||
|
||||
|
||||
auto materialize_mode_settings = std::make_unique<MaterializeMySQLSettings>();
|
||||
|
@ -169,11 +169,22 @@ void DatabaseWithDictionaries::createDictionary(ContextPtr local_context, const
|
||||
}
|
||||
|
||||
bool succeeded = false;
|
||||
bool uuid_locked = false;
|
||||
SCOPE_EXIT({
|
||||
if (!succeeded)
|
||||
{
|
||||
if (uuid_locked)
|
||||
DatabaseCatalog::instance().removeUUIDMappingFinally(dict_id.uuid);
|
||||
Poco::File(dictionary_metadata_tmp_path).remove();
|
||||
}
|
||||
});
|
||||
|
||||
if (dict_id.uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
DatabaseCatalog::instance().addUUIDMapping(dict_id.uuid);
|
||||
uuid_locked = true;
|
||||
}
|
||||
|
||||
/// Add a temporary repository containing the dictionary.
|
||||
/// We need this temp repository to try loading the dictionary before actually attaching it to the database.
|
||||
auto temp_repository = external_loader.addConfigRepository(std::make_unique<ExternalLoaderTempConfigRepository>(
|
||||
|
@ -345,7 +345,8 @@ public:
|
||||
|
||||
virtual void assertCanBeDetached(bool /*cleanup*/) {}
|
||||
|
||||
virtual void waitDetachedTableNotInUse(const UUID & /*uuid*/) { assert(false); }
|
||||
virtual void waitDetachedTableNotInUse(const UUID & /*uuid*/) { }
|
||||
virtual void checkDetachedTableNotInUse(const UUID & /*uuid*/) { }
|
||||
|
||||
/// Ask all tables to complete the background threads they are using and delete all table objects.
|
||||
virtual void shutdown() = 0;
|
||||
|
@ -198,7 +198,7 @@ ASTPtr DatabaseConnectionMySQL::getCreateDatabaseQuery() const
|
||||
|
||||
void DatabaseConnectionMySQL::fetchTablesIntoLocalCache(ContextPtr local_context) const
|
||||
{
|
||||
const auto & tables_with_modification_time = fetchTablesWithModificationTime();
|
||||
const auto & tables_with_modification_time = fetchTablesWithModificationTime(local_context);
|
||||
|
||||
destroyLocalCacheExtraTables(tables_with_modification_time);
|
||||
fetchLatestTablesStructureIntoCache(tables_with_modification_time, local_context);
|
||||
@ -252,7 +252,7 @@ void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(
|
||||
}
|
||||
}
|
||||
|
||||
std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTime() const
|
||||
std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTime(ContextPtr local_context) const
|
||||
{
|
||||
Block tables_status_sample_block
|
||||
{
|
||||
@ -268,7 +268,8 @@ std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTim
|
||||
" WHERE TABLE_SCHEMA = " << quote << database_name_in_mysql;
|
||||
|
||||
std::map<String, UInt64> tables_with_modification_time;
|
||||
MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_status_sample_block, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(local_context->getSettingsRef());
|
||||
MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_status_sample_block, mysql_input_stream_settings);
|
||||
|
||||
while (Block block = result.read())
|
||||
{
|
||||
@ -292,7 +293,7 @@ DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector<String> & tabl
|
||||
mysql_pool,
|
||||
database_name_in_mysql,
|
||||
tables_name,
|
||||
settings.external_table_functions_use_nulls,
|
||||
settings,
|
||||
database_settings->mysql_datatypes_support_level);
|
||||
}
|
||||
|
||||
|
@ -108,7 +108,7 @@ private:
|
||||
|
||||
void fetchTablesIntoLocalCache(ContextPtr context) const;
|
||||
|
||||
std::map<String, UInt64> fetchTablesWithModificationTime() const;
|
||||
std::map<String, UInt64> fetchTablesWithModificationTime(ContextPtr local_context) const;
|
||||
|
||||
std::map<String, NamesAndTypesList> fetchTablesColumnsList(const std::vector<String> & tables_name, ContextPtr context) const;
|
||||
|
||||
|
@ -44,7 +44,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
mysqlxx::PoolWithFailover & pool,
|
||||
const String & database_name,
|
||||
const std::vector<String> & tables_name,
|
||||
bool external_table_functions_use_nulls,
|
||||
const Settings & settings,
|
||||
MultiEnum<MySQLDataTypesSupport> type_support)
|
||||
{
|
||||
std::map<String, NamesAndTypesList> tables_and_columns;
|
||||
@ -72,13 +72,18 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
" IS_NULLABLE = 'YES' AS is_nullable,"
|
||||
" COLUMN_TYPE LIKE '%unsigned' AS is_unsigned,"
|
||||
" CHARACTER_MAXIMUM_LENGTH AS length,"
|
||||
" NUMERIC_PRECISION as '',"
|
||||
" NUMERIC_PRECISION as numeric_precision,"
|
||||
" IF(ISNULL(NUMERIC_SCALE), DATETIME_PRECISION, NUMERIC_SCALE) AS scale" // we know DATETIME_PRECISION as a scale in CH
|
||||
" FROM INFORMATION_SCHEMA.COLUMNS"
|
||||
" WHERE TABLE_SCHEMA = " << quote << database_name
|
||||
<< " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION";
|
||||
" WHERE ";
|
||||
|
||||
MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE);
|
||||
if (!database_name.empty())
|
||||
query << " TABLE_SCHEMA = " << quote << database_name << " AND ";
|
||||
|
||||
query << " TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION";
|
||||
|
||||
StreamSettings mysql_input_stream_settings(settings);
|
||||
MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, mysql_input_stream_settings);
|
||||
while (Block block = result.read())
|
||||
{
|
||||
const auto & table_name_col = *block.getByPosition(0).column;
|
||||
@ -99,7 +104,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
convertMySQLDataType(
|
||||
type_support,
|
||||
column_type_col[i].safeGet<String>(),
|
||||
external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
|
||||
settings.external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
|
||||
is_unsigned_col[i].safeGet<UInt64>(),
|
||||
char_max_length_col[i].safeGet<UInt64>(),
|
||||
precision_col[i].safeGet<UInt64>(),
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -20,7 +21,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
mysqlxx::PoolWithFailover & pool,
|
||||
const String & database_name,
|
||||
const std::vector<String> & tables_name,
|
||||
bool external_table_functions_use_nulls,
|
||||
const Settings & settings,
|
||||
MultiEnum<MySQLDataTypesSupport> type_support);
|
||||
|
||||
}
|
||||
|
@ -24,7 +24,8 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name, const std::vector<String> & fetch_tables)
|
||||
const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name,
|
||||
const std::vector<String> & fetch_tables, const Settings & global_settings)
|
||||
{
|
||||
std::unordered_map<String, String> tables_create_query;
|
||||
for (const auto & fetch_table_name : fetch_tables)
|
||||
@ -34,9 +35,10 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
{std::make_shared<DataTypeString>(), "Create Table"},
|
||||
};
|
||||
|
||||
StreamSettings mysql_input_stream_settings(global_settings, false, true);
|
||||
MySQLBlockInputStream show_create_table(
|
||||
connection, "SHOW CREATE TABLE " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(fetch_table_name),
|
||||
show_create_table_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
show_create_table_header, mysql_input_stream_settings);
|
||||
|
||||
Block create_query_block = show_create_table.read();
|
||||
if (!create_query_block || create_query_block.rows() != 1)
|
||||
@ -49,13 +51,14 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
}
|
||||
|
||||
|
||||
static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database)
|
||||
static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database, const Settings & global_settings)
|
||||
{
|
||||
Block header{{std::make_shared<DataTypeString>(), "table_name"}};
|
||||
String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE != 'VIEW' AND TABLE_SCHEMA = " + quoteString(database);
|
||||
|
||||
std::vector<String> tables_in_db;
|
||||
MySQLBlockInputStream input(connection, query, header, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(global_settings);
|
||||
MySQLBlockInputStream input(connection, query, header, mysql_input_stream_settings);
|
||||
|
||||
while (Block block = input.read())
|
||||
{
|
||||
@ -77,7 +80,8 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c
|
||||
{std::make_shared<DataTypeString>(), "Executed_Gtid_Set"},
|
||||
};
|
||||
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER STATUS;", header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER STATUS;", header, mysql_input_stream_settings);
|
||||
Block master_status = input.read();
|
||||
|
||||
if (!master_status || master_status.rows() != 1)
|
||||
@ -99,7 +103,8 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
|
||||
};
|
||||
|
||||
const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'";
|
||||
MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, mysql_input_stream_settings);
|
||||
|
||||
while (Block variables_block = variables_input.read())
|
||||
{
|
||||
@ -114,7 +119,7 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & connection, WriteBuffer & out)
|
||||
static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & connection, const Settings & global_settings, WriteBuffer & out)
|
||||
{
|
||||
Block sync_user_privs_header
|
||||
{
|
||||
@ -122,7 +127,8 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
|
||||
};
|
||||
|
||||
String grants_query, sub_privs;
|
||||
MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(global_settings);
|
||||
MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, mysql_input_stream_settings);
|
||||
while (Block block = input.read())
|
||||
{
|
||||
for (size_t index = 0; index < block.rows(); ++index)
|
||||
@ -146,11 +152,11 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
|
||||
return false;
|
||||
}
|
||||
|
||||
static void checkSyncUserPriv(const mysqlxx::PoolWithFailover::Entry & connection)
|
||||
static void checkSyncUserPriv(const mysqlxx::PoolWithFailover::Entry & connection, const Settings & global_settings)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
|
||||
if (!checkSyncUserPrivImpl(connection, out))
|
||||
if (!checkSyncUserPrivImpl(connection, global_settings, out))
|
||||
throw Exception("MySQL SYNC USER ACCESS ERR: mysql sync user needs "
|
||||
"at least GLOBAL PRIVILEGES:'RELOAD, REPLICATION SLAVE, REPLICATION CLIENT' "
|
||||
"and SELECT PRIVILEGE on MySQL Database."
|
||||
@ -167,7 +173,8 @@ bool MaterializeMetadata::checkBinlogFileExists(const mysqlxx::PoolWithFailover:
|
||||
{std::make_shared<DataTypeUInt64>(), "File_size"}
|
||||
};
|
||||
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", logs_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", logs_header, mysql_input_stream_settings);
|
||||
|
||||
while (Block block = input.read())
|
||||
{
|
||||
@ -222,7 +229,7 @@ void MaterializeMetadata::transaction(const MySQLReplication::Position & positio
|
||||
commitMetadata(std::move(fun), persistent_tmp_path, persistent_path);
|
||||
}
|
||||
|
||||
MaterializeMetadata::MaterializeMetadata(const String & path_) : persistent_path(path_)
|
||||
MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & settings_) : persistent_path(path_), settings(settings_)
|
||||
{
|
||||
if (Poco::File(persistent_path).exists())
|
||||
{
|
||||
@ -244,7 +251,7 @@ void MaterializeMetadata::startReplication(
|
||||
mysqlxx::PoolWithFailover::Entry & connection, const String & database,
|
||||
bool & opened_transaction, std::unordered_map<String, String> & need_dumping_tables)
|
||||
{
|
||||
checkSyncUserPriv(connection);
|
||||
checkSyncUserPriv(connection, settings);
|
||||
|
||||
if (checkBinlogFileExists(connection))
|
||||
return;
|
||||
@ -263,7 +270,7 @@ void MaterializeMetadata::startReplication(
|
||||
connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute();
|
||||
|
||||
opened_transaction = true;
|
||||
need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database));
|
||||
need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), settings);
|
||||
connection->query("UNLOCK TABLES;").execute();
|
||||
}
|
||||
catch (...)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Core/MySQL/MySQLReplication.h>
|
||||
#include <mysqlxx/Connection.h>
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,6 +26,7 @@ namespace DB
|
||||
struct MaterializeMetadata
|
||||
{
|
||||
const String persistent_path;
|
||||
const Settings settings;
|
||||
|
||||
String binlog_file;
|
||||
UInt64 binlog_position;
|
||||
@ -50,7 +52,7 @@ struct MaterializeMetadata
|
||||
bool & opened_transaction,
|
||||
std::unordered_map<String, String> & need_dumping_tables);
|
||||
|
||||
MaterializeMetadata(const String & path_);
|
||||
MaterializeMetadata(const String & path_, const Settings & settings_);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ MaterializeMySQLSyncThread::~MaterializeMySQLSyncThread()
|
||||
}
|
||||
}
|
||||
|
||||
static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
|
||||
static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const Settings & settings)
|
||||
{
|
||||
Block variables_header{
|
||||
{std::make_shared<DataTypeString>(), "Variable_name"},
|
||||
@ -104,19 +104,19 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
|
||||
"OR (Variable_name = 'default_authentication_plugin' AND upper(Value) = 'MYSQL_NATIVE_PASSWORD') "
|
||||
"OR (Variable_name = 'log_bin_use_v1_row_events' AND upper(Value) = 'OFF');";
|
||||
|
||||
MySQLBlockInputStream variables_input(connection, check_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream variables_input(connection, check_query, variables_header, mysql_input_stream_settings);
|
||||
|
||||
Block variables_block = variables_input.read();
|
||||
if (!variables_block || variables_block.rows() != 5)
|
||||
std::unordered_map<String, String> variables_error_message{
|
||||
{"log_bin", "log_bin = 'ON'"},
|
||||
{"binlog_format", "binlog_format='ROW'"},
|
||||
{"binlog_row_image", "binlog_row_image='FULL'"},
|
||||
{"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
|
||||
{"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
|
||||
};
|
||||
|
||||
while (Block variables_block = variables_input.read())
|
||||
{
|
||||
std::unordered_map<String, String> variables_error_message{
|
||||
{"log_bin", "log_bin = 'ON'"},
|
||||
{"binlog_format", "binlog_format='ROW'"},
|
||||
{"binlog_row_image", "binlog_row_image='FULL'"},
|
||||
{"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
|
||||
{"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
|
||||
};
|
||||
|
||||
ColumnPtr variable_name_column = variables_block.getByName("Variable_name").column;
|
||||
|
||||
for (size_t index = 0; index < variables_block.rows(); ++index)
|
||||
@ -126,7 +126,10 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
|
||||
if (error_message_it != variables_error_message.end())
|
||||
variables_error_message.erase(error_message_it);
|
||||
}
|
||||
}
|
||||
|
||||
if (!variables_error_message.empty())
|
||||
{
|
||||
bool first = true;
|
||||
WriteBufferFromOwnString error_message;
|
||||
error_message << "Illegal MySQL variables, the MaterializeMySQL engine requires ";
|
||||
@ -167,7 +170,7 @@ void MaterializeMySQLSyncThread::synchronization()
|
||||
try
|
||||
{
|
||||
MaterializeMetadata metadata(
|
||||
DatabaseCatalog::instance().getDatabase(database_name)->getMetadataPath() + "/.metadata");
|
||||
DatabaseCatalog::instance().getDatabase(database_name)->getMetadataPath() + "/.metadata", getContext()->getSettingsRef());
|
||||
bool need_reconnect = true;
|
||||
|
||||
Stopwatch watch;
|
||||
@ -240,7 +243,7 @@ void MaterializeMySQLSyncThread::assertMySQLAvailable()
|
||||
{
|
||||
try
|
||||
{
|
||||
checkMySQLVariables(pool.get());
|
||||
checkMySQLVariables(pool.get(), getContext()->getSettingsRef());
|
||||
}
|
||||
catch (const mysqlxx::ConnectionFailed & e)
|
||||
{
|
||||
@ -326,9 +329,10 @@ static inline void dumpDataForTables(
|
||||
tryToExecuteQuery(query_prefix + " " + iterator->second, query_context, database_name, comment); /// create table.
|
||||
|
||||
auto out = std::make_shared<CountingBlockOutputStream>(getTableOutput(database_name, table_name, query_context));
|
||||
StreamSettings mysql_input_stream_settings(context->getSettingsRef());
|
||||
MySQLBlockInputStream input(
|
||||
connection, "SELECT * FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name),
|
||||
out->getHeader(), DEFAULT_BLOCK_SIZE);
|
||||
out->getHeader(), mysql_input_stream_settings);
|
||||
|
||||
Stopwatch watch;
|
||||
copyData(input, *out, is_cancelled);
|
||||
@ -375,7 +379,7 @@ bool MaterializeMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metad
|
||||
|
||||
opened_transaction = false;
|
||||
|
||||
checkMySQLVariables(connection);
|
||||
checkMySQLVariables(connection, getContext()->getSettingsRef());
|
||||
std::unordered_map<String, String> need_dumping_tables;
|
||||
metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables);
|
||||
|
||||
|
@ -4,9 +4,15 @@
|
||||
#include "DictionarySourceFactory.h"
|
||||
#include "DictionaryStructure.h"
|
||||
#include "registerDictionaries.h"
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
[[maybe_unused]]
|
||||
static const size_t default_num_tries_on_connection_loss = 3;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
@ -14,20 +20,20 @@ namespace ErrorCodes
|
||||
|
||||
void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
{
|
||||
auto create_table_source = [=](const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
Block & sample_block,
|
||||
ContextPtr /* context */,
|
||||
auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct,
|
||||
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
|
||||
[[maybe_unused]] const std::string & config_prefix,
|
||||
[[maybe_unused]] Block & sample_block,
|
||||
[[maybe_unused]] ContextPtr context,
|
||||
const std::string & /* default_database */,
|
||||
bool /* check_config */) -> DictionarySourcePtr {
|
||||
#if USE_MYSQL
|
||||
return std::make_unique<MySQLDictionarySource>(dict_struct, config, config_prefix + ".mysql", sample_block);
|
||||
StreamSettings mysql_input_stream_settings(context->getSettingsRef()
|
||||
, config.getBool(config_prefix + ".mysql.close_connection", false) || config.getBool(config_prefix + ".mysql.share_connection", false)
|
||||
, false
|
||||
, config.getBool(config_prefix + ".mysql.fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss);
|
||||
return std::make_unique<MySQLDictionarySource>(dict_struct, config, config_prefix + ".mysql", sample_block, mysql_input_stream_settings);
|
||||
#else
|
||||
(void)dict_struct;
|
||||
(void)config;
|
||||
(void)config_prefix;
|
||||
(void)sample_block;
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support.");
|
||||
#endif
|
||||
@ -45,22 +51,21 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <common/LocalDateTime.h>
|
||||
# include <common/logger_useful.h>
|
||||
# include <Formats/MySQLBlockInputStream.h>
|
||||
# include "readInvalidateQuery.h"
|
||||
# include <mysqlxx/Exception.h>
|
||||
# include <mysqlxx/PoolFactory.h>
|
||||
# include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
static const UInt64 max_block_size = 8192;
|
||||
static const size_t default_num_tries_on_connection_loss = 3;
|
||||
|
||||
|
||||
MySQLDictionarySource::MySQLDictionarySource(
|
||||
const DictionaryStructure & dict_struct_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
const Block & sample_block_)
|
||||
const Block & sample_block_,
|
||||
const StreamSettings & settings_)
|
||||
: log(&Poco::Logger::get("MySQLDictionarySource"))
|
||||
, update_time{std::chrono::system_clock::from_time_t(0)}
|
||||
, dict_struct{dict_struct_}
|
||||
@ -74,10 +79,7 @@ MySQLDictionarySource::MySQLDictionarySource(
|
||||
, query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks}
|
||||
, load_all_query{query_builder.composeLoadAllQuery()}
|
||||
, invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
|
||||
, close_connection(
|
||||
config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false))
|
||||
, max_tries_for_mysql_block_input_stream(
|
||||
config.getBool(config_prefix + ".fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss)
|
||||
, settings(settings_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -98,8 +100,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other
|
||||
, last_modification{other.last_modification}
|
||||
, invalidate_query{other.invalidate_query}
|
||||
, invalidate_query_response{other.invalidate_query_response}
|
||||
, close_connection{other.close_connection}
|
||||
, max_tries_for_mysql_block_input_stream{other.max_tries_for_mysql_block_input_stream}
|
||||
, settings(other.settings)
|
||||
{
|
||||
}
|
||||
|
||||
@ -122,7 +123,7 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate()
|
||||
BlockInputStreamPtr MySQLDictionarySource::loadFromQuery(const String & query)
|
||||
{
|
||||
return std::make_shared<MySQLWithFailoverBlockInputStream>(
|
||||
pool, query, sample_block, max_block_size, close_connection, false, max_tries_for_mysql_block_input_stream);
|
||||
pool, query, sample_block, settings);
|
||||
}
|
||||
|
||||
BlockInputStreamPtr MySQLDictionarySource::loadAll()
|
||||
@ -245,7 +246,7 @@ LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry &
|
||||
++fetched_rows;
|
||||
}
|
||||
|
||||
if (close_connection && allow_connection_closure)
|
||||
if (settings.auto_close && allow_connection_closure)
|
||||
{
|
||||
connection.disconnect();
|
||||
}
|
||||
@ -269,7 +270,7 @@ std::string MySQLDictionarySource::doInvalidateQuery(const std::string & request
|
||||
Block invalidate_sample_block;
|
||||
ColumnPtr column(ColumnString::create());
|
||||
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
|
||||
MySQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, 1, close_connection);
|
||||
MySQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, settings);
|
||||
return readInvalidateQuery(block_input_stream);
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
# include "DictionaryStructure.h"
|
||||
# include "ExternalQueryBuilder.h"
|
||||
# include "IDictionarySource.h"
|
||||
|
||||
# include <Formats/MySQLBlockInputStream.h>
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
@ -35,7 +35,8 @@ public:
|
||||
const DictionaryStructure & dict_struct_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
const Block & sample_block_);
|
||||
const Block & sample_block_,
|
||||
const StreamSettings & settings_);
|
||||
|
||||
/// copy-constructor is provided in order to support cloneability
|
||||
MySQLDictionarySource(const MySQLDictionarySource & other);
|
||||
@ -87,8 +88,7 @@ private:
|
||||
LocalDateTime last_modification;
|
||||
std::string invalidate_query;
|
||||
mutable std::string invalidate_query_response;
|
||||
const bool close_connection;
|
||||
const size_t max_tries_for_mysql_block_input_stream;
|
||||
const StreamSettings settings;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -250,8 +250,12 @@ class ReadIndirectBufferFromS3 final : public ReadBufferFromFileBase
|
||||
{
|
||||
public:
|
||||
ReadIndirectBufferFromS3(
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, DiskS3::Metadata metadata_, size_t buf_size_)
|
||||
: client_ptr(std::move(client_ptr_)), bucket(bucket_), metadata(std::move(metadata_)), buf_size(buf_size_)
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, DiskS3::Metadata metadata_, UInt64 s3_max_single_read_retries_, size_t buf_size_)
|
||||
: client_ptr(std::move(client_ptr_))
|
||||
, bucket(bucket_)
|
||||
, metadata(std::move(metadata_))
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, buf_size(buf_size_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -307,7 +311,7 @@ private:
|
||||
const auto & [path, size] = metadata.s3_objects[i];
|
||||
if (size > offset)
|
||||
{
|
||||
auto buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, buf_size);
|
||||
auto buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, s3_max_single_read_retries, buf_size);
|
||||
buf->seek(offset, SEEK_SET);
|
||||
return buf;
|
||||
}
|
||||
@ -336,7 +340,7 @@ private:
|
||||
|
||||
++current_buf_idx;
|
||||
const auto & path = metadata.s3_objects[current_buf_idx].first;
|
||||
current_buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, buf_size);
|
||||
current_buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, s3_max_single_read_retries, buf_size);
|
||||
current_buf->next();
|
||||
working_buffer = current_buf->buffer();
|
||||
absolute_position += working_buffer.size();
|
||||
@ -347,6 +351,7 @@ private:
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||
const String & bucket;
|
||||
DiskS3::Metadata metadata;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
size_t buf_size;
|
||||
|
||||
size_t absolute_position = 0;
|
||||
@ -560,6 +565,7 @@ DiskS3::DiskS3(
|
||||
String bucket_,
|
||||
String s3_root_path_,
|
||||
String metadata_path_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
size_t min_upload_part_size_,
|
||||
size_t max_single_part_upload_size_,
|
||||
size_t min_bytes_for_seek_,
|
||||
@ -573,6 +579,7 @@ DiskS3::DiskS3(
|
||||
, bucket(std::move(bucket_))
|
||||
, s3_root_path(std::move(s3_root_path_))
|
||||
, metadata_path(std::move(metadata_path_))
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, min_upload_part_size(min_upload_part_size_)
|
||||
, max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, min_bytes_for_seek(min_bytes_for_seek_)
|
||||
@ -679,7 +686,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, si
|
||||
LOG_DEBUG(log, "Read from file by path: {}. Existing S3 objects: {}",
|
||||
backQuote(metadata_path + path), metadata.s3_objects.size());
|
||||
|
||||
auto reader = std::make_unique<ReadIndirectBufferFromS3>(client, bucket, metadata, buf_size);
|
||||
auto reader = std::make_unique<ReadIndirectBufferFromS3>(client, bucket, metadata, s3_max_single_read_retries, buf_size);
|
||||
return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), min_bytes_for_seek);
|
||||
}
|
||||
|
||||
@ -979,7 +986,7 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
|
||||
if (!checkObjectExists(source_bucket, source_path + SCHEMA_VERSION_OBJECT))
|
||||
return version;
|
||||
|
||||
ReadBufferFromS3 buffer (client, source_bucket, source_path + SCHEMA_VERSION_OBJECT);
|
||||
ReadBufferFromS3 buffer(client, source_bucket, source_path + SCHEMA_VERSION_OBJECT, s3_max_single_read_retries);
|
||||
readIntText(version, buffer);
|
||||
|
||||
return version;
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
String bucket_,
|
||||
String s3_root_path_,
|
||||
String metadata_path_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
size_t min_upload_part_size_,
|
||||
size_t max_single_part_upload_size_,
|
||||
size_t min_bytes_for_seek_,
|
||||
@ -185,6 +186,7 @@ private:
|
||||
const String bucket;
|
||||
const String s3_root_path;
|
||||
String metadata_path;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
size_t min_upload_part_size;
|
||||
size_t max_single_part_upload_size;
|
||||
size_t min_bytes_for_seek;
|
||||
|
@ -161,6 +161,7 @@ void registerDiskS3(DiskFactory & factory)
|
||||
uri.bucket,
|
||||
uri.key,
|
||||
metadata_path,
|
||||
context->getSettingsRef().s3_max_single_read_retries,
|
||||
context->getSettingsRef().s3_min_upload_part_size,
|
||||
context->getSettingsRef().s3_max_single_part_upload_size,
|
||||
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "gtest_disk.h"
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#endif
|
||||
|
@ -30,6 +30,15 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
StreamSettings::StreamSettings(const Settings & settings, bool auto_close_, bool fetch_by_name_, size_t max_retry_)
|
||||
: max_read_mysql_row_nums((settings.external_storage_max_read_rows) ? settings.external_storage_max_read_rows : settings.max_block_size)
|
||||
, max_read_mysql_bytes_size(settings.external_storage_max_read_bytes)
|
||||
, auto_close(auto_close_)
|
||||
, fetch_by_name(fetch_by_name_)
|
||||
, default_num_tries_on_connection_loss(max_retry_)
|
||||
{
|
||||
}
|
||||
|
||||
MySQLBlockInputStream::Connection::Connection(
|
||||
const mysqlxx::PoolWithFailover::Entry & entry_,
|
||||
const std::string & query_str)
|
||||
@ -44,29 +53,19 @@ MySQLBlockInputStream::MySQLBlockInputStream(
|
||||
const mysqlxx::PoolWithFailover::Entry & entry,
|
||||
const std::string & query_str,
|
||||
const Block & sample_block,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_,
|
||||
const bool fetch_by_name_)
|
||||
const StreamSettings & settings_)
|
||||
: log(&Poco::Logger::get("MySQLBlockInputStream"))
|
||||
, connection{std::make_unique<Connection>(entry, query_str)}
|
||||
, max_block_size{max_block_size_}
|
||||
, auto_close{auto_close_}
|
||||
, fetch_by_name(fetch_by_name_)
|
||||
, settings{std::make_unique<StreamSettings>(settings_)}
|
||||
{
|
||||
description.init(sample_block);
|
||||
initPositionMappingFromQueryResultStructure();
|
||||
}
|
||||
|
||||
/// For descendant MySQLWithFailoverBlockInputStream
|
||||
MySQLBlockInputStream::MySQLBlockInputStream(
|
||||
const Block & sample_block_,
|
||||
UInt64 max_block_size_,
|
||||
bool auto_close_,
|
||||
bool fetch_by_name_)
|
||||
MySQLBlockInputStream::MySQLBlockInputStream(const Block &sample_block_, const StreamSettings & settings_)
|
||||
: log(&Poco::Logger::get("MySQLBlockInputStream"))
|
||||
, max_block_size(max_block_size_)
|
||||
, auto_close(auto_close_)
|
||||
, fetch_by_name(fetch_by_name_)
|
||||
, settings(std::make_unique<StreamSettings>(settings_))
|
||||
{
|
||||
description.init(sample_block_);
|
||||
}
|
||||
@ -76,14 +75,10 @@ MySQLWithFailoverBlockInputStream::MySQLWithFailoverBlockInputStream(
|
||||
mysqlxx::PoolWithFailoverPtr pool_,
|
||||
const std::string & query_str_,
|
||||
const Block & sample_block_,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_,
|
||||
const bool fetch_by_name_,
|
||||
const size_t max_tries_)
|
||||
: MySQLBlockInputStream(sample_block_, max_block_size_, auto_close_, fetch_by_name_)
|
||||
, pool(pool_)
|
||||
, query_str(query_str_)
|
||||
, max_tries(max_tries_)
|
||||
const StreamSettings & settings_)
|
||||
: MySQLBlockInputStream(sample_block_, settings_)
|
||||
, pool(pool_)
|
||||
, query_str(query_str_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -101,12 +96,12 @@ void MySQLWithFailoverBlockInputStream::readPrefix()
|
||||
}
|
||||
catch (const mysqlxx::ConnectionLost & ecl) /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST
|
||||
{
|
||||
LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, max_tries, ecl.displayText());
|
||||
LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, settings->default_num_tries_on_connection_loss, ecl.displayText());
|
||||
}
|
||||
|
||||
if (++count_connect_attempts > max_tries)
|
||||
if (++count_connect_attempts > settings->default_num_tries_on_connection_loss)
|
||||
{
|
||||
LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, max_tries);
|
||||
LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -118,45 +113,57 @@ namespace
|
||||
{
|
||||
using ValueType = ExternalResultDescription::ValueType;
|
||||
|
||||
void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value)
|
||||
void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value, size_t & read_bytes_size)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ValueType::vtUInt8:
|
||||
assert_cast<ColumnUInt8 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 1;
|
||||
break;
|
||||
case ValueType::vtUInt16:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 2;
|
||||
break;
|
||||
case ValueType::vtUInt32:
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
case ValueType::vtUInt64:
|
||||
assert_cast<ColumnUInt64 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 8;
|
||||
break;
|
||||
case ValueType::vtInt8:
|
||||
assert_cast<ColumnInt8 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 1;
|
||||
break;
|
||||
case ValueType::vtInt16:
|
||||
assert_cast<ColumnInt16 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 2;
|
||||
break;
|
||||
case ValueType::vtInt32:
|
||||
assert_cast<ColumnInt32 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
case ValueType::vtInt64:
|
||||
assert_cast<ColumnInt64 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 8;
|
||||
break;
|
||||
case ValueType::vtFloat32:
|
||||
assert_cast<ColumnFloat32 &>(column).insertValue(value.getDouble());
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
case ValueType::vtFloat64:
|
||||
assert_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
|
||||
read_bytes_size += 8;
|
||||
break;
|
||||
case ValueType::vtString:
|
||||
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
|
||||
read_bytes_size += assert_cast<ColumnString &>(column).byteSize();
|
||||
break;
|
||||
case ValueType::vtDate:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16(value.getDate().getDayNum()));
|
||||
read_bytes_size += 2;
|
||||
break;
|
||||
case ValueType::vtDateTime:
|
||||
{
|
||||
@ -166,10 +173,12 @@ namespace
|
||||
if (time < 0)
|
||||
time = 0;
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(time);
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
}
|
||||
case ValueType::vtUUID:
|
||||
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
|
||||
read_bytes_size += assert_cast<ColumnUInt128 &>(column).byteSize();
|
||||
break;
|
||||
case ValueType::vtDateTime64:[[fallthrough]];
|
||||
case ValueType::vtDecimal32: [[fallthrough]];
|
||||
@ -179,10 +188,12 @@ namespace
|
||||
{
|
||||
ReadBuffer buffer(const_cast<char *>(value.data()), value.size(), 0);
|
||||
data_type.getDefaultSerialization()->deserializeWholeText(column, buffer, FormatSettings{});
|
||||
read_bytes_size += column.sizeOfValueIfFixed();
|
||||
break;
|
||||
}
|
||||
case ValueType::vtFixedString:
|
||||
assert_cast<ColumnFixedString &>(column).insertData(value.data(), value.size());
|
||||
read_bytes_size += column.sizeOfValueIfFixed();
|
||||
break;
|
||||
default:
|
||||
throw Exception("Unsupported value type", ErrorCodes::NOT_IMPLEMENTED);
|
||||
@ -198,7 +209,7 @@ Block MySQLBlockInputStream::readImpl()
|
||||
auto row = connection->result.fetch();
|
||||
if (!row)
|
||||
{
|
||||
if (auto_close)
|
||||
if (settings->auto_close)
|
||||
connection->entry.disconnect();
|
||||
|
||||
return {};
|
||||
@ -209,6 +220,8 @@ Block MySQLBlockInputStream::readImpl()
|
||||
columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty();
|
||||
|
||||
size_t num_rows = 0;
|
||||
size_t read_bytes_size = 0;
|
||||
|
||||
while (row)
|
||||
{
|
||||
for (size_t index = 0; index < position_mapping.size(); ++index)
|
||||
@ -224,12 +237,12 @@ Block MySQLBlockInputStream::readImpl()
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
|
||||
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
|
||||
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
|
||||
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value, read_bytes_size);
|
||||
column_nullable.getNullMapData().emplace_back(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
insertValue(*sample.type, *columns[index], description.types[index].first, value);
|
||||
insertValue(*sample.type, *columns[index], description.types[index].first, value, read_bytes_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -245,7 +258,7 @@ Block MySQLBlockInputStream::readImpl()
|
||||
}
|
||||
|
||||
++num_rows;
|
||||
if (num_rows == max_block_size)
|
||||
if (num_rows == settings->max_read_mysql_row_nums || (settings->max_read_mysql_bytes_size && read_bytes_size >= settings->max_read_mysql_bytes_size))
|
||||
break;
|
||||
|
||||
row = connection->result.fetch();
|
||||
@ -257,7 +270,7 @@ void MySQLBlockInputStream::initPositionMappingFromQueryResultStructure()
|
||||
{
|
||||
position_mapping.resize(description.sample_block.columns());
|
||||
|
||||
if (!fetch_by_name)
|
||||
if (!settings->fetch_by_name)
|
||||
{
|
||||
if (description.sample_block.columns() != connection->result.getNumFields())
|
||||
throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while "
|
||||
|
@ -6,11 +6,24 @@
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
#include <mysqlxx/Query.h>
|
||||
#include <Core/ExternalResultDescription.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct StreamSettings
|
||||
{
|
||||
/// Check if setting is enabled, otherwise use common `max_block_size` setting.
|
||||
size_t max_read_mysql_row_nums;
|
||||
size_t max_read_mysql_bytes_size;
|
||||
bool auto_close;
|
||||
bool fetch_by_name;
|
||||
size_t default_num_tries_on_connection_loss;
|
||||
|
||||
StreamSettings(const Settings & settings, bool auto_close_ = false, bool fetch_by_name_ = false, size_t max_retry_ = 5);
|
||||
|
||||
};
|
||||
|
||||
/// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining
|
||||
class MySQLBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
@ -19,16 +32,14 @@ public:
|
||||
const mysqlxx::PoolWithFailover::Entry & entry,
|
||||
const std::string & query_str,
|
||||
const Block & sample_block,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_ = false,
|
||||
const bool fetch_by_name_ = false);
|
||||
const StreamSettings & settings_);
|
||||
|
||||
String getName() const override { return "MySQL"; }
|
||||
|
||||
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
|
||||
|
||||
protected:
|
||||
MySQLBlockInputStream(const Block & sample_block_, UInt64 max_block_size_, bool auto_close_, bool fetch_by_name_);
|
||||
MySQLBlockInputStream(const Block & sample_block_, const StreamSettings & settings);
|
||||
Block readImpl() override;
|
||||
void initPositionMappingFromQueryResultStructure();
|
||||
|
||||
@ -44,9 +55,7 @@ protected:
|
||||
Poco::Logger * log;
|
||||
std::unique_ptr<Connection> connection;
|
||||
|
||||
const UInt64 max_block_size;
|
||||
const bool auto_close;
|
||||
const bool fetch_by_name;
|
||||
const std::unique_ptr<StreamSettings> settings;
|
||||
std::vector<size_t> position_mapping;
|
||||
ExternalResultDescription description;
|
||||
};
|
||||
@ -57,23 +66,18 @@ protected:
|
||||
class MySQLWithFailoverBlockInputStream final : public MySQLBlockInputStream
|
||||
{
|
||||
public:
|
||||
static constexpr inline auto MAX_TRIES_MYSQL_CONNECT = 5;
|
||||
|
||||
MySQLWithFailoverBlockInputStream(
|
||||
mysqlxx::PoolWithFailoverPtr pool_,
|
||||
const std::string & query_str_,
|
||||
const Block & sample_block_,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_ = false,
|
||||
const bool fetch_by_name_ = false,
|
||||
const size_t max_tries_ = MAX_TRIES_MYSQL_CONNECT);
|
||||
const StreamSettings & settings_);
|
||||
|
||||
private:
|
||||
void readPrefix() override;
|
||||
|
||||
mysqlxx::PoolWithFailoverPtr pool;
|
||||
std::string query_str;
|
||||
size_t max_tries;
|
||||
};
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user