Merge remote-tracking branch 'origin' into integration-6

This commit is contained in:
Yatsishin Ilya 2021-07-16 16:28:02 +03:00
commit d11ec777c9
365 changed files with 8411 additions and 1321 deletions

6
.gitmodules vendored
View File

@ -228,3 +228,9 @@
[submodule "contrib/libpqxx"]
path = contrib/libpqxx
url = https://github.com/ClickHouse-Extras/libpqxx.git
[submodule "contrib/sqlite-amalgamation"]
path = contrib/sqlite-amalgamation
url = https://github.com/azadkuh/sqlite-amalgamation
[submodule "contrib/s2geometry"]
path = contrib/s2geometry
url = https://github.com/ClickHouse-Extras/s2geometry.git

View File

@ -536,10 +536,12 @@ include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/odbc.cmake)
include (cmake/find/nanodbc.cmake)
include (cmake/find/sqlite.cmake)
include (cmake/find/rocksdb.cmake)
include (cmake/find/libpqxx.cmake)
include (cmake/find/nuraft.cmake)
include (cmake/find/yaml-cpp.cmake)
include (cmake/find/s2geometry.cmake)
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
set (ENABLE_ORC OFF CACHE INTERNAL "")

View File

@ -0,0 +1,24 @@
option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES})
if (ENABLE_S2_GEOMETRY)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/s2geometry")
message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init --recursive")
set (ENABLE_S2_GEOMETRY 0)
set (USE_S2_GEOMETRY 0)
else()
if (OPENSSL_FOUND)
set (S2_GEOMETRY_LIBRARY s2)
set (S2_GEOMETRY_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src/s2)
set (USE_S2_GEOMETRY 1)
else()
message (WARNING "S2 uses OpenSSL, but the latter is absent.")
endif()
endif()
if (NOT USE_S2_GEOMETRY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable S2 geometry library")
endif()
endif()
message (STATUS "Using s2geometry=${USE_S2_GEOMETRY} : ${S2_GEOMETRY_INCLUDE_DIR}")

16
cmake/find/sqlite.cmake Normal file
View File

@ -0,0 +1,16 @@
option(ENABLE_SQLITE "Enable sqlite" ${ENABLE_LIBRARIES})
if (NOT ENABLE_SQLITE)
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation/sqlite3.c")
message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sqlite library")
set (USE_SQLITE 0)
return()
endif()
set (USE_SQLITE 1)
set (SQLITE_LIBRARY sqlite)
message (STATUS "Using sqlite=${USE_SQLITE}")

View File

@ -1,4 +1,4 @@
option(ENABLE_STATS "Enalbe StatsLib library" ${ENABLE_LIBRARIES})
option(ENABLE_STATS "Enable StatsLib library" ${ENABLE_LIBRARIES})
if (ENABLE_STATS)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/stats")

View File

@ -1,3 +1,4 @@
# Third-party libraries may have substandard code.
# Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will
@ -10,10 +11,8 @@ else ()
endif ()
unset (_current_dir_name)
# Third-party libraries may have substandard code.
# Also remove a possible source of nondeterminism.
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -D__DATE__= -D__TIME__= -D__TIMESTAMP__=")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -D__DATE__= -D__TIME__= -D__TIMESTAMP__=")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
if (WITH_COVERAGE)
set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
@ -329,3 +328,10 @@ endif()
add_subdirectory(fast_float)
if (USE_SQLITE)
add_subdirectory(sqlite-cmake)
endif()
if (USE_S2_GEOMETRY)
add_subdirectory(s2geometry-cmake)
endif()

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 5994506908028612869fee627d68d8212dfe7c1e
Subproject commit 7351c4691b5d401f59e3959adfc5b4fa263b32da

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 07c77549a20b63ff6981b400085eba36bb5c80c4
Subproject commit 6ff0adefdc84dac44e78804f7ca4122fe992cf8d

1
contrib/s2geometry vendored Submodule

@ -0,0 +1 @@
Subproject commit 20ea540d81f4575a3fc0aea585aac611bcd03ede

View File

@ -0,0 +1,126 @@
set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")
set(S2_SRCS
"${S2_SOURCE_DIR}/s2/base/stringprintf.cc"
"${S2_SOURCE_DIR}/s2/base/strtoint.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2shape_index.cc"
"${S2_SOURCE_DIR}/s2/encoded_string_vector.cc"
"${S2_SOURCE_DIR}/s2/id_set_lexicon.cc"
"${S2_SOURCE_DIR}/s2/mutable_s2shape_index.cc"
"${S2_SOURCE_DIR}/s2/r2rect.cc"
"${S2_SOURCE_DIR}/s2/s1angle.cc"
"${S2_SOURCE_DIR}/s2/s1chord_angle.cc"
"${S2_SOURCE_DIR}/s2/s1interval.cc"
"${S2_SOURCE_DIR}/s2/s2boolean_operation.cc"
"${S2_SOURCE_DIR}/s2/s2builder.cc"
"${S2_SOURCE_DIR}/s2/s2builder_graph.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_closed_set_normalizer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_find_polygon_degeneracies.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_lax_polygon_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2point_vector_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2polygon_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_vector_layer.cc"
"${S2_SOURCE_DIR}/s2/s2builderutil_snap_functions.cc"
"${S2_SOURCE_DIR}/s2/s2cap.cc"
"${S2_SOURCE_DIR}/s2/s2cell.cc"
"${S2_SOURCE_DIR}/s2/s2cell_id.cc"
"${S2_SOURCE_DIR}/s2/s2cell_index.cc"
"${S2_SOURCE_DIR}/s2/s2cell_union.cc"
"${S2_SOURCE_DIR}/s2/s2centroids.cc"
"${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc"
"${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2closest_point_query.cc"
"${S2_SOURCE_DIR}/s2/s2contains_vertex_query.cc"
"${S2_SOURCE_DIR}/s2/s2convex_hull_query.cc"
"${S2_SOURCE_DIR}/s2/s2coords.cc"
"${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2debug.cc"
"${S2_SOURCE_DIR}/s2/s2earth.cc"
"${S2_SOURCE_DIR}/s2/s2edge_clipping.cc"
"${S2_SOURCE_DIR}/s2/s2edge_crosser.cc"
"${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
"${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
"${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
"${S2_SOURCE_DIR}/s2/s2error.cc"
"${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2latlng.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc"
"${S2_SOURCE_DIR}/s2/s2lax_loop_shape.cc"
"${S2_SOURCE_DIR}/s2/s2lax_polygon_shape.cc"
"${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc"
"${S2_SOURCE_DIR}/s2/s2loop.cc"
"${S2_SOURCE_DIR}/s2/s2loop_measures.cc"
"${S2_SOURCE_DIR}/s2/s2measures.cc"
"${S2_SOURCE_DIR}/s2/s2metrics.cc"
"${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc"
"${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc"
"${S2_SOURCE_DIR}/s2/s2padded_cell.cc"
"${S2_SOURCE_DIR}/s2/s2point_compression.cc"
"${S2_SOURCE_DIR}/s2/s2point_region.cc"
"${S2_SOURCE_DIR}/s2/s2pointutil.cc"
"${S2_SOURCE_DIR}/s2/s2polygon.cc"
"${S2_SOURCE_DIR}/s2/s2polyline.cc"
"${S2_SOURCE_DIR}/s2/s2polyline_alignment.cc"
"${S2_SOURCE_DIR}/s2/s2polyline_measures.cc"
"${S2_SOURCE_DIR}/s2/s2polyline_simplifier.cc"
"${S2_SOURCE_DIR}/s2/s2predicates.cc"
"${S2_SOURCE_DIR}/s2/s2projections.cc"
"${S2_SOURCE_DIR}/s2/s2r2rect.cc"
"${S2_SOURCE_DIR}/s2/s2region.cc"
"${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc"
"${S2_SOURCE_DIR}/s2/s2region_coverer.cc"
"${S2_SOURCE_DIR}/s2/s2region_intersection.cc"
"${S2_SOURCE_DIR}/s2/s2region_union.cc"
"${S2_SOURCE_DIR}/s2/s2shape_index.cc"
"${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc"
"${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc"
"${S2_SOURCE_DIR}/s2/s2shape_measures.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
"${S2_SOURCE_DIR}/s2/s2text_format.cc"
"${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
"${S2_SOURCE_DIR}/s2/strings/ostringstream.cc"
"${S2_SOURCE_DIR}/s2/strings/serialize.cc"
# ClickHouse doesn't use strings from abseil.
# So, there is no duplicate symbols.
"${S2_SOURCE_DIR}/s2/third_party/absl/base/dynamic_annotations.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/raw_logging.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/throw_delegate.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/numeric/int128.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/ascii.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/match.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/numbers.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_cat.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_split.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/string_view.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/strip.cc"
"${S2_SOURCE_DIR}/s2/third_party/absl/strings/internal/memutil.cc"
"${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
"${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
"${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
"${S2_SOURCE_DIR}/s2/util/coding/varint.cc"
"${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
"${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
"${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
)
add_library(s2 ${S2_SRCS})
if (OPENSSL_FOUND)
target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES})
endif()
target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
if(M_LIBRARY)
target_link_libraries(s2 PRIVATE ${M_LIBRARY})
endif()

1
contrib/sqlite-amalgamation vendored Submodule

@ -0,0 +1 @@
Subproject commit 9818baa5d027ffb26d57f810dc4c597d4946781c

View File

@ -0,0 +1,6 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation")
set(SRCS ${LIBRARY_DIR}/sqlite3.c)
add_library(sqlite ${SRCS})
target_include_directories(sqlite SYSTEM PUBLIC "${LIBRARY_DIR}")

View File

@ -378,6 +378,16 @@ function run_tests
# needs pv
01923_network_receive_time_metric_insert
01889_sqlite_read_write
# needs s2
01849_geoToS2
01851_s2_to_geo
01852_s2_get_neighbours
01853_s2_cells_intersect
01854_s2_cap_contains
01854_s2_cap_union
)
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \

View File

@ -32,7 +32,7 @@ RUN rm -rf \
RUN apt-get clean
# Install MySQL ODBC driver
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper.
# ZooKeeper is not started by default, but consumes some space in containers.
@ -49,4 +49,3 @@ RUN mkdir /zookeeper && chmod -R 777 /zookeeper
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -29,7 +29,8 @@ RUN apt-get update -y \
unixodbc \
wget \
mysql-client=5.7* \
postgresql-client
postgresql-client \
sqlite3
RUN pip3 install numpy scipy pandas

View File

@ -118,7 +118,7 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
# Print Fatal log messages to stdout
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
# Grep logs for sanitizer asserts, crashes and other critical errors
@ -131,22 +131,22 @@ zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" > /dev
rm -f /test_output/tmp
# OOM
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
# Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv

View File

@ -105,11 +105,11 @@ clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee te
5) ensure everything is correct, if the test output is incorrect (due to some bug for example), adjust the reference file using text editor.
#### How to create good test
#### How to create a good test
- test should be
- A test should be
- minimal - create only tables related to tested functionality, remove unrelated columns and parts of query
- fast - should not take longer than few seconds (better subseconds)
- fast - should not take longer than a few seconds (better subseconds)
- correct - fails then feature is not working
- deterministic
- isolated / stateless
@ -126,6 +126,16 @@ clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee te
- use other SQL files in the `0_stateless` folder as an example
- ensure the feature / feature combination you want to test is not yet covered with existing tests
#### Test naming rules
It's important to name tests correctly, so one could turn some tests subset off in clickhouse-test invocation.
| Tester flag| What should be in test name | When flag should be added |
|---|---|---|---|
| `--[no-]zookeeper`| "zookeeper" or "replica" | Test uses tables from ReplicatedMergeTree family |
| `--[no-]shard` | "shard" or "distributed" or "global"| Test using connections to 127.0.0.2 or similar |
| `--[no-]long` | "long" or "deadlock" or "race" | Test runs longer than 60 seconds |
#### Commit / push / create PR.
1) commit & push your changes

View File

@ -134,10 +134,10 @@ $ ./release
## Faster builds for development
Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. Two common ways to improve linking time are to use `lld` linker, and use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable these tweaks, pass the following flags to `cmake`:
Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. One common way to improve build time is to use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable this tweak, pass the following flags to `cmake`:
```
-DCMAKE_C_FLAGS="--ld-path=lld" -DCMAKE_CXX_FLAGS="--ld-path=lld" -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1
-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1
```
## You Dont Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}

View File

@ -79,6 +79,7 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li
| re2 | BSD 3-clause | /contrib/re2/LICENSE |
| replxx | BSD 3-clause | /contrib/replxx/LICENSE.md |
| rocksdb | BSD 3-clause | /contrib/rocksdb/LICENSE.leveldb |
| s2geometry | Apache | /contrib/s2geometry/LICENSE |
| sentry-native | MIT | /contrib/sentry-native/LICENSE |
| simdjson | Apache | /contrib/simdjson/LICENSE |
| snappy | Public Domain | /contrib/snappy/COPYING |

View File

@ -47,7 +47,7 @@ EXCHANGE TABLES new_table AND old_table;
### ReplicatedMergeTree in Atomic Database {#replicatedmergetree-in-atomic-database}
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables, it is recommended to not specify engine parameters - path in ZooKeeper and replica name. In this case, configuration parameters will be used [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). If you want to specify engine parameters explicitly, it is recommended to use {uuid} macros. This is useful so that unique paths are automatically generated for each table in ZooKeeper.
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables, it is recommended to not specify engine parameters - path in ZooKeeper and replica name. In this case, configuration parameters will be used [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). If you want to specify engine parameters explicitly, it is recommended to use `{uuid}` macros. This is useful so that unique paths are automatically generated for each table in ZooKeeper.
## See Also

View File

@ -22,4 +22,4 @@ You can also use the following database engines:
- [PostgreSQL](../../engines/database-engines/postgresql.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->
- [Replicated](../../engines/database-engines/replicated.md)

View File

@ -0,0 +1,115 @@
# [experimental] Replicated {#replicated}
The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database.
One ClickHouse server can have multiple replicated databases running and updating at the same time. But there can't be multiple replicas of the same replicated database.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
```
**Engine Parameters**
- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database.
- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`.
- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard.
!!! note "Warning"
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database.
## Specifics and Recommendations {#specifics-and-recommendations}
DDL queries with `Replicated` database work in a similar way to [ON CLUSTER](../../sql-reference/distributed-ddl.md) queries, but with minor differences.
First, the DDL request tries to execute on the initiator (the host that originally received the request from the user). If the request is not fulfilled, then the user immediately receives an error, other hosts do not try to fulfill it. If the request has been successfully completed on the initiator, then all other hosts will automatically retry until they complete it. The initiator will try to wait for the query to be completed on other hosts (no longer than [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout)) and will return a table with the query execution statuses on each host.
The behavior in case of errors is regulated by the [distributed_ddl_output_mode](../../operations/settings/settings.md#distributed_ddl_output_mode) setting, for a `Replicated` database it is better to set it to `null_status_on_timeout` — i.e. if some hosts did not have time to execute the request for [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout), then do not throw an exception, but show the `NULL` status for them in the table.
The [system.clusters](../../operations/system-tables/clusters.md) system table contains a cluster named like the replicated database, which consists of all replicas of the database. This cluster is updated automatically when creating/deleting replicas, and it can be used for [Distributed](../../engines/table-engines/special/distributed.md#distributed) tables.
When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata).
## Usage Example {#usage-example}
Creating a cluster with three hosts:
``` sql
node1 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','replica1');
node2 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','other_replica');
node3 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','{replica}');
```
Running the DDL-query:
``` sql
CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
```
``` text
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
```
Showing the system table:
``` sql
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
FROM system.clusters WHERE cluster='r';
```
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
Creating a distributed table and inserting the data:
``` sql
node2 :) CREATE TABLE r.d (n UInt64) ENGINE=Distributed('r','r','rmt', n % 2);
node3 :) INSERT INTO r SELECT * FROM numbers(10);
node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
``` text
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
└───────┴───────────────┘
```
Adding replica on the one more host:
``` sql
node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
```
The cluster configuration will look like this:
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
The distributed table also will get data from the new host:
```sql
node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
```text
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
└───────┴───────────────┘
```

View File

@ -1,6 +1,6 @@
---
toc_priority: 12
toc_title: MateriaziePostgreSQL
toc_title: MaterializedPostgreSQL
---
# MaterializedPostgreSQL {#materialize-postgresql}

View File

@ -76,7 +76,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `SAMPLE BY` — An expression for sampling. Optional.
If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
If a sampling expression is used, the primary key must contain it. The result of sampling expression must be unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.

View File

@ -1130,17 +1130,18 @@ The table below shows supported data types and how they match ClickHouse [data t
| `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` |
| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` |
| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` |
| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` |
| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* |
| `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` |
| `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` |
| `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` |
| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` |
| `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` |
| `int (date)` \* | [Date](../sql-reference/data-types/date.md) | `int (date)` \* |
| `long (timestamp-millis)` \* | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
| `long (timestamp-micros)` \* | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** |
| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#settings-output_format_avro_string_column_pattern)
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
Unsupported Avro data types: `record` (non-root), `map`
@ -1246,12 +1247,14 @@ The table below shows supported data types and how they match ClickHouse [data t
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type.
@ -1299,13 +1302,17 @@ The table below shows supported data types and how they match ClickHouse [data t
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](../sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type.
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type.
@ -1358,8 +1365,10 @@ The table below shows supported data types and how they match ClickHouse [data t
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.

View File

@ -157,5 +157,6 @@ toc_title: Adopters
| <a href="https://signoz.io/" class="favicon">SigNoz</a> | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) |
| <a href="https://chelpipegroup.com/" class="favicon">ChelPipe Group</a> | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) |
| <a href="https://zagravagames.com/en/" class="favicon">Zagrava Trading</a> | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) |
| <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -10,7 +10,7 @@ ClickHouse server use [ZooKeeper](https://zookeeper.apache.org/) coordination sy
!!! warning "Warning"
This feature currently in pre-production stage. We test it in our CI and on small internal installations.
## Implemetation details
## Implementation details
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper `clickhouse-keeper` written in C++ and use [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.

View File

@ -278,4 +278,16 @@ Possible values:
Default value: `0`.
## check_sample_column_is_correct {#check_sample_column_is_correct}
Enables to check column for sampling or sampling expression is correct at table creation.
Possible values:
- true — Check column or sampling expression is correct at table creation.
- false — Do not check column or sampling expression is correct at table creation.
Default value: `true`.
By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting.
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->

View File

@ -1213,7 +1213,15 @@ Default value: `3`.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes.
Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format.
Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations.
Possible values:
- 0 — Integers are output without quotes.
- 1 — Integers are enclosed in quotes.
Default value: 1.
## output_format_json_quote_denormals {#settings-output_format_json_quote_denormals}
@ -1961,6 +1969,13 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB)
Default value: 32768 (32 KiB)
## output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern}
Regexp of column names of type String to output as Avro `string` (default is `bytes`).
RE2 syntax is supported.
Type: string
## format_avro_schema_registry_url {#format_avro_schema_registry_url}
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format.
@ -1990,6 +2005,16 @@ Possible values:
Default value: 16.
## merge_selecting_sleep_ms {#merge_selecting_sleep_ms}
Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters
Possible values:
- Any positive integer.
Default value: 5000
## parallel_distributed_insert_select {#parallel_distributed_insert_select}
Enables parallel distributed `INSERT ... SELECT` query.
@ -3123,6 +3148,53 @@ SELECT
FROM fuse_tbl
```
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
Possible values:
- Positive integer.
- 0 — Unlimited.
Default value: `300`.
## distributed_ddl_task_timeout {#distributed_ddl_task_timeout}
Sets timeout for DDL query responses from all hosts in cluster. If a DDL request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite.
Possible values:
- Positive integer.
- 0 — Async mode.
- Negative integer — infinite timeout.
Default value: `180`.
## distributed_ddl_output_mode {#distributed_ddl_output_mode}
Sets format of distributed DDL query result.
Possible values:
- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception.
- `none` — Is similar to throw, but distributed DDL query returns no result set.
- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
Default value: `throw`.
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
@ -3202,3 +3274,14 @@ Default value: `1`.
**Usage**
If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.
## output_format_arrow_low_cardinality_as_dictionary {#output-format-arrow-low-cardinality-as-dictionary}
Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md#data-format-arrow) format for `SELECT` queries.
Possible values:
- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type.
- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type.
Default value: `0`.

View File

@ -8,12 +8,11 @@ Columns:
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `name` ([String](../../sql-reference/data-types/string.md)) — Index name.
- `type` ([String](../../sql-reference/data-types/string.md)) — Index type.
- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression used to calculate the index.
- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of granules in the block.
- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation.
- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block.
**Example**
```sql
SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical;
```

View File

@ -34,7 +34,7 @@ Input table:
Query:
``` sql
SELECT medianDeterministic(val, 1) FROM t
SELECT medianDeterministic(val, 1) FROM t;
```
Result:

View File

@ -47,6 +47,7 @@ Settings:
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary)
Functions:
@ -57,5 +58,3 @@ Functions:
- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality).
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
[Original article](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/) <!--hide-->

View File

@ -12,9 +12,6 @@ toc_title: Map(key, value)
- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
!!! warning "Warning"
Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity.
**Examples**

View File

@ -195,6 +195,41 @@ Result:
└────────────────────┘
```
## h3ToGeo {#h3togeo}
Returns `(lon, lat)` that corresponds to the provided H3 index.
**Syntax**
``` sql
h3ToGeo(h3Index)
```
**Arguments**
- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT h3ToGeo(644325524701193974) coordinates;
```
Result:
``` text
┌─coordinates───────────────────────────┐
│ (37.79506616830252,55.71290243145668) │
└───────────────────────────────────────┘
```
## h3kRing {#h3kring}
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.

View File

@ -306,3 +306,49 @@ Result:
└───────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## toJSONString {#tojsonstring}
Serializes a value to its JSON representation. Various data types and nested structures are supported.
64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior.
Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them.
When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name.
**Syntax**
``` sql
toJSONString(value)
```
**Arguments**
- `value` — Value to serialize. Value may be of any data type.
**Returned value**
- JSON representation of the value.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
The first example shows serialization of a [Map](../../sql-reference/data-types/map.md).
The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md).
Query:
``` sql
SELECT toJSONString(map('key1', 1, 'key2', 2));
SELECT toJSONString(tuple(1.25, NULL, NaN, +inf, -inf, [])) SETTINGS output_format_json_quote_denormals = 1;
```
Result:
``` text
{"key1":1,"key2":2}
[1.25,null,"nan","inf","-inf",[]]
```
**See Also**
- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers)
- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals)

View File

@ -465,27 +465,29 @@ Result:
## CAST(x, T) {#type_conversion_function-cast}
Converts input value `x` to the `T` data type. Unlike to `reinterpret` function, type conversion is performed in a natural way.
The syntax `CAST(x AS t)` is also supported.
!!! note "Note"
If value `x` does not fit the bounds of type `T`, the function overflows. For example, `CAST(-1, 'UInt8')` returns `255`.
Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
Several syntax variants are supported.
**Syntax**
``` sql
CAST(x, T)
CAST(x AS t)
x::t
```
**Arguments**
- `x` — Any type.
- `T` — Destination type. [String](../../sql-reference/data-types/string.md).
- `x` — A value to convert. May be of any type.
- `T` — The name of the target data type. [String](../../sql-reference/data-types/string.md).
- `t` — The target data type.
**Returned value**
- Destination type value.
- Converted value.
!!! note "Note"
If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`.
**Examples**
@ -494,16 +496,16 @@ Query:
```sql
SELECT
CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint,
CAST(toInt8(1), 'Float32') AS cast_int_to_float,
CAST('1', 'UInt32') AS cast_string_to_int;
CAST(1.5 AS Decimal(3,2)) AS cast_float_to_decimal,
'1'::Int32 AS cast_string_to_int;
```
Result:
```
┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┐
│ 255 │ 1 │ 1 │
└──────────────────┴───────────────────┴────────────────────┘
┌─cast_int_to_uint─┬─cast_float_to_decimal─┬─cast_string_to_int─┐
│ 255 │ 1.50 │ 1 │
└──────────────────┴───────────────────────┴────────────────────┘
```
Query:

View File

@ -1 +0,0 @@
../../en/development/build-osx.md

View File

@ -0,0 +1,125 @@
---
toc_priority: 65
toc_title: Сборка на Mac OS X
---
# Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах.
## Установка Homebrew {#install-homebrew}
``` bash
$ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
```
## Установка Xcode и инструментов командной строки {#install-xcode-and-command-line-tools}
1. Установите из App Store последнюю версию [Xcode](https://apps.apple.com/am/app/xcode/id497799835?mt=12).
2. Запустите ее, чтобы принять лицензионное соглашение. Необходимые компоненты установятся автоматически.
3. Затем убедитесь, что в системе выбрана последняя версия инструментов командной строки:
``` bash
$ sudo rm -rf /Library/Developer/CommandLineTools
$ sudo xcode-select --install
```
4. Перезагрузитесь.
## Установка компиляторов, инструментов и библиотек {#install-required-compilers-tools-and-libraries}
``` bash
$ brew update
$ brew install cmake ninja libtool gettext llvm gcc
```
## Просмотр исходников ClickHouse {#checkout-clickhouse-sources}
``` bash
$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git # or https://github.com/ClickHouse/ClickHouse.git
```
## Сборка ClickHouse {#build-clickhouse}
Чтобы запустить сборку в компиляторе Xcode's native AppleClang:
``` bash
$ cd ClickHouse
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```
Чтобы запустить сборку в компиляторе Homebrew's vanilla Clang:
``` bash
$ cd ClickHouse
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```
Чтобы собрать с помощью компилятора Homebrew's vanilla GCC:
``` bash
$ cd ClickHouse
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```
## Предупреждения {#caveats}
Если будете запускать `clickhouse-server`, убедитесь, что увеличили системную переменную `maxfiles`.
!!! info "Note"
Вам понадобится команда `sudo`.
1. Создайте файл `/Library/LaunchDaemons/limit.maxfiles.plist` и поместите в него следующее:
``` xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>limit.maxfiles</string>
<key>ProgramArguments</key>
<array>
<string>launchctl</string>
<string>limit</string>
<string>maxfiles</string>
<string>524288</string>
<string>524288</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>ServiceIPC</key>
<false/>
</dict>
</plist>
```
2. Выполните команду:
``` bash
$ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist
```
3. Перезагрузитесь.
4. Чтобы проверить, как это работает, выполните команду `ulimit -n`.
[Original article](https://clickhouse.tech/docs/en/development/build_osx/) <!--hide-->

View File

@ -20,3 +20,5 @@ toc_title: "Введение"
- [PostgreSQL](../../engines/database-engines/postgresql.md)
- [Replicated](../../engines/database-engines/replicated.md)

View File

@ -49,6 +49,7 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |

View File

@ -0,0 +1,119 @@
# [экспериментальный] Replicated {#replicated}
Движок основан на движке [Atomic](../../engines/database-engines/atomic.md). Он поддерживает репликацию метаданных через журнал DDL, записываемый в ZooKeeper и выполняемый на всех репликах для данной базы данных.
На одном сервере ClickHouse может одновременно работать и обновляться несколько реплицированных баз данных. Но не может существовать нескольких реплик одной и той же реплицированной базы данных.
## Создание базы данных {#creating-a-database}
``` sql
CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
```
**Параметры движка**
- `zoo_path` — путь в ZooKeeper. Один и тот же путь ZooKeeper соответствует одной и той же базе данных.
- `shard_name` — Имя шарда. Реплики базы данных группируются в шарды по имени.
- `replica_name` — Имя реплики. Имена реплик должны быть разными для всех реплик одного и того же шарда.
!!! note "Предупреждение"
Для таблиц [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) если аргументы не заданы, то используются аргументы по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в серверных настройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Макрос `{uuid}` раскрывается в `UUID` таблицы, `{shard}` и `{replica}` — в значения из конфига сервера. В будущем появится возможность использовать значения `shard_name` и `replica_name` аргументов движка базы данных `Replicated`.
## Особенности и рекомендации {#specifics-and-recommendations}
DDL-запросы с базой данных `Replicated` работают похожим образом на [ON CLUSTER](../../sql-reference/distributed-ddl.md) запросы, но с небольшими отличиями.
Сначала DDL-запрос пытается выполниться на инициаторе (том хосте, который изначально получил запрос от пользователя). Если запрос не выполнился, то пользователь сразу получает ошибку, другие хосты не пытаются его выполнить. Если запрос успешно выполнился на инициаторе, то все остальные хосты будут автоматически делать попытки выполнить его.
Инициатор попытается дождаться выполнения запроса на других хостах (не дольше [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout)) и вернёт таблицу со статусами выполнения запроса на каждом хосте.
Поведение в случае ошибок регулируется настройкой [distributed_ddl_output_mode](../../operations/settings/settings.md#distributed_ddl_output_mode), для `Replicated` лучше выставлять её в `null_status_on_timeout` — т.е. если какие-то хосты не успели выполнить запрос за [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout), то вместо исключения для них будет показан статус `NULL` в таблице.
В системной таблице [system.clusters](../../operations/system-tables/clusters.md) есть кластер с именем, как у реплицируемой базы, который состоит из всех реплик базы. Этот кластер обновляется автоматически при создании/удалении реплик, и его можно использовать для [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблиц.
При создании новой реплики базы, эта реплика сама создаёт таблицы. Если реплика долго была недоступна и отстала от лога репликации — она сверяет свои локальные метаданные с актуальными метаданными в ZooKeeper, перекладывает лишние таблицы с данными в отдельную нереплицируемую базу (чтобы случайно не удалить что-нибудь лишнее), создаёт недостающие таблицы, обновляет имена таблиц, если были переименования. Данные реплицируются на уровне `ReplicatedMergeTree`, т.е. если таблица не реплицируемая, то данные реплицироваться не будут (база отвечает только за метаданные).
## Примеры использования {#usage-example}
Создадим реплицируемую базу на трех хостах:
``` sql
node1 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','replica1');
node2 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','other_replica');
node3 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','{replica}');
```
Выполним DDL-запрос на одном из хостов:
``` sql
CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
```
Запрос выполнится на всех остальных хостах:
``` text
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
```
Кластер в системной таблице `system.clusters`:
``` sql
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
FROM system.clusters WHERE cluster='r';
```
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
Создадим распределенную таблицу и вставим в нее данные:
``` sql
node2 :) CREATE TABLE r.d (n UInt64) ENGINE=Distributed('r','r','rmt', n % 2);
node3 :) INSERT INTO r SELECT * FROM numbers(10);
node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
``` text
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
└───────┴───────────────┘
```
Добавление реплики:
``` sql
node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
```
Новая реплика автоматически создаст все таблицы, которые есть в базе, а старые реплики перезагрузят из ZooKeeper-а конфигурацию кластера:
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
Распределенная таблица также получит данные от нового хоста:
```sql
node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
```text
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
└───────┴───────────────┘
```

View File

@ -101,8 +101,8 @@ sudo ./clickhouse install
Для других операционных систем и архитектуры AArch64 сборки ClickHouse предоставляются в виде кросс-компилированного бинарного файла из последнего коммита ветки `master` (с задержкой в несколько часов).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
После скачивания можно воспользоваться `clickhouse client` для подключения к серверу или `clickhouse local` для обработки локальных данных.

View File

@ -1165,12 +1165,14 @@ SELECT * FROM topic1_stream;
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
@ -1218,12 +1220,17 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными.
Тип `DICTIONARY` поддерживается для запросов `INSERT`. Для запросов `SELECT` есть настройка [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary), которая позволяет выводить тип [LowCardinality](../sql-reference/data-types/lowcardinality.md) как `DICTIONARY`.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Arrow `DECIMAL` как `Decimal128`.
@ -1276,8 +1283,10 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.

View File

@ -1204,8 +1204,15 @@ load_balancing = round_robin
Работает для форматов JSONEachRow и TSKV.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
Управляет кавычками при выводе 64-битных или более [целых чисел](../../sql-reference/data-types/int-uint.md) (например, `UInt64` или `Int128`) в формате [JSON](../../interfaces/formats.md#json).
По умолчанию такие числа заключаются в кавычки. Это поведение соответствует большинству реализаций JavaScript.
Если значение истинно, то при использовании JSON\* форматов UInt64 и Int64 числа выводятся в кавычках (из соображений совместимости с большинством реализаций JavaScript), иначе - без кавычек.
Возможные значения:
- 0 — числа выводятся без кавычек.
- 1 — числа выводятся в кавычках.
Значение по умолчанию: 1.
## output_format_json_quote_denormals {#settings-output_format_json_quote_denormals}
@ -2979,6 +2986,53 @@ SELECT
FROM fuse_tbl
```
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Позволяет создавать базы данных с движком [Replicated](../../engines/database-engines/replicated.md).
Возможные значения:
- 0 — Disabled.
- 1 — Enabled.
Значение по умолчанию: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Устанавливает, как долго начальный DDL-запрос должен ждать, пока реплицированная база данных прецессирует предыдущие записи очереди DDL в секундах.
Возможные значения:
- Положительное целое число.
- 0 — Не ограничено.
Значение по умолчанию: `300`.
## distributed_ddl_task_timeout {#distributed_ddl_task_timeout}
Устанавливает тайм-аут для ответов на DDL-запросы от всех хостов в кластере. Если DDL-запрос не был выполнен на всех хостах, ответ будет содержать ошибку тайм-аута, и запрос будет выполнен в асинхронном режиме.
Возможные значения:
- Положительное целое число.
- 0 — Асинхронный режим.
- Отрицательное число — бесконечный тайм-аут.
Значение по умолчанию: `180`.
## distributed_ddl_output_mode {#distributed_ddl_output_mode}
Задает формат результата распределенного DDL-запроса.
Возможные значения:
- `throw` — возвращает набор результатов со статусом выполнения запросов для всех хостов, где завершен запрос. Если запрос не выполнился на некоторых хостах, то будет выброшено исключение. Если запрос еще не закончен на некоторых хостах и таймаут [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) превышен, то выбрасывается исключение `TIMEOUT_EXCEEDED`.
- `none` — идентично `throw`, но распределенный DDL-запрос не возвращает набор результатов.
- `null_status_on_timeout` — возвращает `NULL` в качестве статуса выполнения в некоторых строках набора результатов вместо выбрасывания `TIMEOUT_EXCEEDED`, если запрос не закончен на соответствующих хостах.
- `never_throw` — не выбрасывает исключение и `TIMEOUT_EXCEEDED`, если запрос не удался на некоторых хостах.
Значение по умолчанию: `throw`.
## flatten_nested {#flatten-nested}
Устанавливает формат данных у [вложенных](../../sql-reference/data-types/nested-data-structures/nested.md) столбцов.
@ -3059,3 +3113,14 @@ SETTINGS index_granularity = 8192 │
**Использование**
Если установлено значение `0`, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
## output_format_arrow_low_cardinality_as_dictionary {#output-format-arrow-low-cardinality-as-dictionary}
Позволяет конвертировать тип [LowCardinality](../../sql-reference/data-types/lowcardinality.md) в тип `DICTIONARY` формата [Arrow](../../interfaces/formats.md#data-format-arrow) для запросов `SELECT`.
Возможные значения:
- 0 — тип `LowCardinality` не конвертируется в тип `DICTIONARY`.
- 1 — тип `LowCardinality` конвертируется в тип `DICTIONARY`.
Значение по умолчанию: `0`.

View File

@ -0,0 +1,38 @@
# system.data_skipping_indices {#system-data-skipping-indices}
Содержит информацию о существующих индексах пропуска данных во всех таблицах.
Столбцы:
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `name` ([String](../../sql-reference/data-types/string.md)) — имя индекса.
- `type` ([String](../../sql-reference/data-types/string.md)) — тип индекса.
- `expr` ([String](../../sql-reference/data-types/string.md)) — выражение, используемое для вычисления индекса.
- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество гранул в блоке данных.
**Пример**
```sql
SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: default
table: user_actions
name: clicks_idx
type: minmax
expr: clicks
granularity: 1
Row 2:
──────
database: default
table: users
name: contacts_null_idx
type: minmax
expr: assumeNotNull(contacts_null)
granularity: 1
```

View File

@ -4,7 +4,6 @@
Функции:
- `median` — синоним для [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile).
- `medianDeterministic` — синоним для [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic).
- `medianExact` — синоним для [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact).
@ -31,7 +30,7 @@
Запрос:
``` sql
SELECT medianDeterministic(val, 1) FROM t
SELECT medianDeterministic(val, 1) FROM t;
```
Результат:
@ -41,4 +40,3 @@ SELECT medianDeterministic(val, 1) FROM t
│ 1.5 │
└─────────────────────────────┘
```

View File

@ -23,11 +23,11 @@ LowCardinality(data_type)
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
При работе со строками, использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
## Пример
Создать таблицу со столбцами типа `LowCardinality`:
Создание таблицы со столбцами типа `LowCardinality`:
```sql
CREATE TABLE lc_t
@ -47,6 +47,7 @@ ORDER BY id
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary)
Функции:
@ -57,4 +58,3 @@ ORDER BY id
- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality).
- [Reducing Clickhouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).

View File

@ -12,9 +12,6 @@ toc_title: Map(key, value)
- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
!!! warning "Предупреждение"
Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`.
Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью.
**Примеры**

View File

@ -306,3 +306,51 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello"
│ [('d','"hello"'),('f','"world"')] │
└───────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## toJSONString {#tojsonstring}
Сериализует значение в JSON представление. Поддерживаются различные типы данных и вложенные структуры.
По умолчанию 64-битные [целые числа](../../sql-reference/data-types/int-uint.md) и более (например, `UInt64` или `Int128`) заключаются в кавычки. Настройка [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) управляет этим поведением.
Специальные значения `NaN` и `inf` заменяются на `null`. Чтобы они отображались, включите настройку [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals).
Когда сериализуется значение [Enum](../../sql-reference/data-types/enum.md), то функция выводит его имя.
**Синтаксис**
``` sql
toJSONString(value)
```
**Аргументы**
- `value` — значение, которое необходимо сериализовать. Может быть любого типа.
**Возвращаемое значение**
- JSON представление значения.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Первый пример показывает сериализацию [Map](../../sql-reference/data-types/map.md).
Во втором примере есть специальные значения, обернутые в [Tuple](../../sql-reference/data-types/tuple.md).
Запрос:
``` sql
SELECT toJSONString(map('key1', 1, 'key2', 2));
SELECT toJSONString(tuple(1.25, NULL, NaN, +inf, -inf, [])) SETTINGS output_format_json_quote_denormals = 1;
```
Результат:
``` text
{"key1":1,"key2":2}
[1.25,null,"nan","inf","-inf",[]]
```
**Смотрите также**
- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers)
- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals)

View File

@ -462,27 +462,29 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
## CAST(x, T) {#type_conversion_function-cast}
Преобразует входное значение `x` в указанный тип данных `T`. В отличии от функции `reinterpret` использует внешнее представление значения `x`.
Поддерживается также синтаксис `CAST(x AS t)`.
!!! warning "Предупреждение"
Если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
Преобразует входное значение к указанному типу данных. В отличие от функции [reinterpret](#type_conversion_function-reinterpret) `CAST` пытается представить то же самое значение в новом типе данных. Если преобразование невозможно, то возникает исключение.
Поддерживается несколько вариантов синтаксиса.
**Синтаксис**
``` sql
CAST(x, T)
CAST(x AS t)
x::t
```
**Аргументы**
- `x` — любой тип данных.
- `T` — конечный тип данных. [String](../../sql-reference/data-types/string.md).
- `x` — значение, которое нужно преобразовать. Может быть любого типа.
- `T` — имя типа данных. [String](../../sql-reference/data-types/string.md).
- `t` — тип данных.
**Возвращаемое значение**
- Значение конечного типа данных.
- Преобразованное значение.
!!! note "Примечание"
Если входное значение выходит за границы нового типа, то результат переполняется. Например, `CAST(-1, 'UInt8')` возвращает `255`.
**Примеры**
@ -491,16 +493,16 @@ CAST(x, T)
```sql
SELECT
CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint,
CAST(toInt8(1), 'Float32') AS cast_int_to_float,
CAST('1', 'UInt32') AS cast_string_to_int
CAST(1.5 AS Decimal(3,2)) AS cast_float_to_decimal,
'1'::Int32 AS cast_string_to_int;
```
Результат:
```
┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┐
│ 255 │ 1 │ 1 │
└──────────────────┴───────────────────┴────────────────────┘
┌─cast_int_to_uint─┬─cast_float_to_decimal─┬─cast_string_to_int─┐
│ 255 │ 1.50 │ 1 │
└──────────────────┴───────────────────────┴────────────────────┘
```
Запрос:
@ -524,7 +526,7 @@ SELECT
Преобразование в FixedString(N) работает только для аргументов типа [String](../../sql-reference/data-types/string.md) или [FixedString](../../sql-reference/data-types/fixedstring.md).
Поддерживается преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно.
Поддерживается преобразование к типу [Nullable](../../sql-reference/data-types/nullable.md) и обратно.
**Примеры**

View File

@ -17,7 +17,7 @@ toc_title: PARTITION
- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — очистить построенные вторичные индексы для заданной партиции;
- [FREEZE PARTITION](#alter_freeze-partition) — создать резервную копию партиции;
- [UNFREEZE PARTITION](#alter_unfreeze-partition) — удалить резервную копию партиции;
- [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера;
- [FETCH PARTITION\|PART](#alter_fetch-partition) — скачать партицию/кусок с другого сервера;
- [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том.
- [UPDATE IN PARTITION](#update-in-partition) — обновить данные внутри партиции по условию.
- [DELETE IN PARTITION](#delete-in-partition) — удалить данные внутри партиции по условию.
@ -209,29 +209,35 @@ ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name
Удаляет с диска "замороженные" партиции с указанным именем. Если секция `PARTITION` опущена, запрос удаляет резервную копию всех партиций сразу.
## FETCH PARTITION {#alter_fetch-partition}
## FETCH PARTITION\|PART {#alter_fetch-partition}
``` sql
ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper'
ALTER TABLE table_name FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper'
```
Загружает партицию с другого сервера. Этот запрос работает только для реплицированных таблиц.
Запрос выполняет следующее:
1. Загружает партицию с указанного шарда. Путь к шарду задается в секции `FROM` (path-in-zookeeper). Обратите внимание, нужно задавать путь к шарду в ZooKeeper.
1. Загружает партицию/кусок с указанного шарда. Путь к шарду задается в секции `FROM` (path-in-zookeeper). Обратите внимание, нужно задавать путь к шарду в ZooKeeper.
2. Помещает загруженные данные в директорию `detached` таблицы `table_name`. Чтобы прикрепить эти данные к таблице, используйте запрос [ATTACH PARTITION\|PART](#alter_attach-partition).
Например:
1. FETCH PARTITION
``` sql
ALTER TABLE users FETCH PARTITION 201902 FROM '/clickhouse/tables/01-01/visits';
ALTER TABLE users ATTACH PARTITION 201902;
```
2. FETCH PART
``` sql
ALTER TABLE users FETCH PART 201901_2_2_0 FROM '/clickhouse/tables/01-01/visits';
ALTER TABLE users ATTACH PART 201901_2_2_0;
```
Следует иметь в виду:
- Запрос `ALTER TABLE t FETCH PARTITION` не реплицируется. Он загружает партицию в директорию `detached` только на локальном сервере.
- Запрос `ALTER TABLE t FETCH PARTITION|PART` не реплицируется. Он загружает партицию в директорию `detached` только на локальном сервере.
- Запрос `ALTER TABLE t ATTACH` реплицируется — он добавляет данные в таблицу сразу на всех репликах. На одной из реплик данные будут добавлены из директории `detached`, а на других — из соседних реплик.
Перед загрузкой данных система проверяет, существует ли партиция и совпадает ли её структура со структурой таблицы. При этом автоматически выбирается наиболее актуальная реплика среди всех живых реплик.

View File

@ -282,7 +282,7 @@ GRANT INSERT(x,y) ON db.table TO john
- `ALTER MATERIALIZE TTL`. Уровень: `TABLE`. Алиасы: `MATERIALIZE TTL`
- `ALTER SETTINGS`. Уровень: `TABLE`. Алиасы: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING`
- `ALTER MOVE PARTITION`. Уровень: `TABLE`. Алиасы: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART`
- `ALTER FETCH PARTITION`. Уровень: `TABLE`. Алиасы: `FETCH PARTITION`
- `ALTER FETCH PARTITION`. Уровень: `TABLE`. Алиасы: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART`
- `ALTER FREEZE PARTITION`. Уровень: `TABLE`. Алиасы: `FREEZE PARTITION`
- `ALTER VIEW` Уровень: `GROUP`
- `ALTER VIEW REFRESH `. Уровень: `VIEW`. Алиасы: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW`

View File

@ -6,12 +6,12 @@ toc_title: Atomic
# Atomic {#atomic}
It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default.
它支持非阻塞 DROP 和 RENAME TABLE 查询以及原子 EXCHANGE TABLES t1 AND t2 查询。默认情况下使用Atomic数据库引擎。
## Creating a Database {#creating-a-database}
## 创建数据库 {#creating-a-database}
```sql
CREATE DATABASE test ENGINE = Atomic;
```
[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->

View File

@ -1,4 +1,4 @@
# 折叠树 {#table_engine-collapsingmergetree}
# CollapsingMergeTree {#table_engine-collapsingmergetree}
该引擎继承于 [MergeTree](mergetree.md),并在数据块合并算法中添加了折叠行的逻辑。
@ -203,4 +203,4 @@ SELECT * FROM UAct FINAL
这种查询数据的方法是非常低效的。不要在大表中使用它。
[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->

View File

@ -3,7 +3,7 @@ toc_priority: 37
toc_title: "版本折叠MergeTree"
---
# 版本折叠MergeTree {#versionedcollapsingmergetree}
# VersionedCollapsingMergeTree {#versionedcollapsingmergetree}
这个引擎:

View File

@ -5,6 +5,6 @@ toc_title: 原生接口(TCP)
# 原生接口TCP{#native-interface-tcp}
原生接口用于[命令行客户端](cli.md)用于分布式查询处理期间的服务器间通信以及其他C++程序。可惜的是,原生的ClickHouse协议还没有正式的规范但它可以从ClickHouse[源代码](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)通过拦截和分析TCP流量进行反向工程。
原生接口协议用于[命令行客户端](cli.md)用于分布式查询处理期间的服务器间通信以及其他C++ 程序。不幸的是,原生ClickHouse协议还没有正式的规范但它可以从ClickHouse源代码[从这里开始](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)或通过拦截和分析TCP流量进行逆向工程。
[来源文章](https://clickhouse.tech/docs/zh/interfaces/tcp/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/interfaces/tcp/) <!--hide-->

View File

@ -57,9 +57,9 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix).
- 表格预览。
- 自动完成。
### ツ环板-ョツ嘉ッツ偲 {#clickhouse-cli}
### clickhouse-cli {#clickhouse-cli}
[ツ环板-ョツ嘉ッツ偲](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端用Python 3编写。
[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端用Python 3编写。
特征:
@ -68,15 +68,15 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix).
- 寻呼机支持数据输出。
- 自定义PostgreSQL类命令。
### ツ暗ェツ氾环催ツ団ツ法ツ人 {#clickhouse-flamegraph}
### clickhouse-flamegraph {#clickhouse-flamegraph}
[clickhouse-flamegraph](https://github.com/Slach/clickhouse-flamegraph) 是一个可视化的专业工具`system.trace_log`如[flamegraph](http://www.brendangregg.com/flamegraphs.html).
## 商业 {#shang-ye}
### ツ环板Softwareョツ嘉ッ {#holistics-software}
### Holistics {#holistics-software}
[整体学](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为可用性最高排名第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数据平台和商业智能工具用于设置您的分析流程。
[Holistics](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为可用性最高排名第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数据平台和商业智能工具用于设置您的分析流程。
特征:

View File

@ -5,9 +5,21 @@ toc_title: "操作"
# 操作 {#operations}
Clickhouse运维手册主要包含下面几部分
ClickHouse操作手册由以下主要部分组成
- 安装要求
- [安装要求](../operations/requirements.md)
- [监控](../operations/monitoring.md)
- [故障排除](../operations/troubleshooting.md)
- [使用建议](../operations/tips.md)
- [更新程序](../operations/update.md)
- [访问权限](../operations/access-rights.md)
- [数据备份](../operations/backup.md)
- [配置文件](../operations/configuration-files.md)
- [配额](../operations/quotas.md)
- [系统表](../operations/system-tables/index.md)
- [服务器配置参数](../operations/server-configuration-parameters/index.md)
- [如何用ClickHouse测试你的硬件](../operations/performance-test.md)
- [设置](../operations/settings/index.md)
- [实用工具](../operations/utilities/index.md)
[原始文章](https://clickhouse.tech/docs/en/operations/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/operations/) <!--hide-->

View File

@ -1,13 +1,8 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 42
toc_title: mysql
---
# mysql {#mysql}
允许 `SELECT` 要对存储在远程MySQL服务器上的数据执行的查询。
允许对存储在远程MySQL服务器上的数据执行`SELECT`和`INSERT`查询。
**语法**
``` sql
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
@ -15,31 +10,44 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
**参数**
- `host:port` — MySQL server address.
- `host:port` — MySQL服务器地址.
- `database`Remote database name.
- `database`远程数据库名称.
- `table`Remote table name.
- `table`远程表名称.
- `user` — MySQL user.
- `user` — MySQL用户.
- `password`User password.
- `password`用户密码.
- `replace_query`Flag that converts `INSERT INTO` 查询到 `REPLACE INTO`. 如果 `replace_query=1`,查询被替换。
- `replace_query`将INSERT INTO` 查询转换为 `REPLACE INTO`的标志。如果 `replace_query=1`,查询被替换。
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` 表达式被添加`INSERT` 查询。
- `on_duplicate_clause` — 添加 `ON DUPLICATE KEY on_duplicate_clause` 表达式到 `INSERT` 查询。明确规定只能使用 `replace_query = 0` 如果你同时设置replace_query = 1`和`on_duplicate_clause`ClickHouse将产生异常。
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
示例:`INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`
To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception.
`on_duplicate_clause`这里是`UPDATE c2 = c2 + 1`。请查阅MySQL文档来找到可以和`ON DUPLICATE KEY`一起使用的 `on_duplicate_clause`子句。
简单 `WHERE` 条款如 `=, !=, >, >=, <, <=` 当前在MySQL服务器上执行
简单`WHERE` 子句如 `=, !=, >, >=, <, <=` 将即时在MySQL服务器上执行。其余的条件和 `LIMIT` 只有在对MySQL的查询完成后才会在ClickHouse中执行采样约束
其余的条件和 `LIMIT` 只有在对MySQL的查询完成后才会在ClickHouse中执行采样约束。
支持使用`|`并列进行多副本查询,示例如下:
```sql
SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
```sql
SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
**返回值**
与原始MySQL表具有相同列的table对象。
与原始MySQL表具有相同列的表对象。
!!! note "注意"
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
## 用法示例 {#usage-example}
@ -66,7 +74,7 @@ mysql> select * from test;
1 row in set (0,00 sec)
```
从ClickHouse中选择数据:
从ClickHouse中查询数据:
``` sql
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
@ -78,6 +86,21 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
└────────┴──────────────┴───────┴────────────────┘
```
替换和插入:
```sql
INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
```
```text
┌─int_id─┬─float─┐
│ 1 │ 3 │
│ 2 │ 4 │
└────────┴───────┘
```
## 另请参阅 {#see-also}
- [MySQL 表引擎](../../engines/table-engines/integrations/mysql.md)

View File

@ -477,17 +477,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
if (ThreadFuzzer::instance().isEffective())
LOG_WARNING(log, "ThreadFuzzer is enabled. Application will run slowly and unstable.");
#if !defined(NDEBUG) || !defined(__OPTIMIZE__)
LOG_WARNING(log, "Server was built in debug mode. It will work slowly.");
#endif
#if defined(SANITIZER)
LOG_WARNING(log, "Server was built with sanitizer. It will work slowly.");
#endif
/** Context contains all that query execution is dependent:
* settings, available functions, data types, aggregate functions, databases, ...
*/
@ -497,6 +486,18 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->makeGlobalContext();
global_context->setApplicationType(Context::ApplicationType::SERVER);
#if !defined(NDEBUG) || !defined(__OPTIMIZE__)
global_context->addWarningMessage("Server was built in debug mode. It will work slowly.");
#endif
if (ThreadFuzzer::instance().isEffective())
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
#if defined(SANITIZER)
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
#endif
// Initialize global thread pool. Do it before we fetch configs from zookeeper
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
@ -552,8 +553,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1)
{
/// Program is run under debugger. Modification of it's binary image is ok for breakpoints.
LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).",
calculated_binary_hash);
global_context->addWarningMessage(
fmt::format("Server is run under debugger and its binary image is modified (most likely with breakpoints).",
calculated_binary_hash)
);
}
else
{
@ -636,7 +639,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
else
{
LOG_WARNING(log, message);
global_context->addWarningMessage(message);
}
}

View File

@ -9,7 +9,7 @@
Do not use any JavaScript or CSS frameworks or preprocessors.
This HTML page should not require any build systems (node.js, npm, gulp, etc.)
This HTML page should not be minified, instead it should be reasonably minimalistic by itself.
This HTML page should not load any external resources
This HTML page should not load any external resources on load.
(CSS and JavaScript must be embedded directly to the page. No external fonts or images should be loaded).
This UI should look as lightweight, clean and fast as possible.
All UI elements must be aligned in pixel-perfect way.
@ -343,13 +343,18 @@
/// Save query in history only if it is different.
let previous_query = '';
const current_url = new URL(window.location);
const server_address = current_url.searchParams.get('url');
if (server_address) {
document.getElementById('url').value = server_address;
} else if (location.protocol != 'file:') {
/// Substitute the address of the server where the page is served.
if (location.protocol != 'file:') {
document.getElementById('url').value = location.origin;
}
/// Substitute user name if it's specified in the query string
let user_from_url = (new URL(window.location)).searchParams.get('user');
const user_from_url = current_url.searchParams.get('user');
if (user_from_url) {
document.getElementById('user').value = user_from_url;
}
@ -361,7 +366,9 @@
let user = document.getElementById('user').value;
let password = document.getElementById('password').value;
let url = document.getElementById('url').value +
let server_address = document.getElementById('url').value;
let url = server_address +
/// Ask server to allow cross-domain requests.
'?add_http_cors_header=1' +
'&user=' + encodeURIComponent(user) +
@ -390,11 +397,18 @@
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
};
let title = "ClickHouse Query: " + query;
let url = window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query);
let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
if (server_address != location.origin) {
/// Save server's address in URL if it's not identical to the address of the play UI.
history_url += '&url=' + encodeURIComponent(server_address);
}
history_url += '#' + window.btoa(query);
if (previous_query == '') {
history.replaceState(state, title, url);
history.replaceState(state, title, history_url);
} else {
history.pushState(state, title, url);
history.pushState(state, title, history_url);
}
document.title = title;
previous_query = query;
@ -599,10 +613,16 @@
}
/// Huge JS libraries should be loaded only if needed.
function loadJS(src) {
function loadJS(src, integrity) {
return new Promise((resolve, reject) => {
const script = document.createElement('script');
script.src = src;
if (integrity) {
script.crossOrigin = 'anonymous';
script.integrity = integrity;
} else {
console.warn('no integrity for', src)
}
script.addEventListener('load', function() { resolve(true); });
document.head.appendChild(script);
});
@ -613,10 +633,14 @@
if (load_dagre_promise) { return load_dagre_promise; }
load_dagre_promise = Promise.all([
loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js'),
loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js'),
loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js'),
loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0'),
loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js',
'sha384-2IH3T69EIKYC4c+RXZifZRvaH5SRUdacJW7j6HtE5rQbvLhKKdawxq6vpIzJ7j9M'),
loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js',
'sha384-Q7oatU+b+y0oTkSoiRH9wTLH6sROySROCILZso/AbMMm9uKeq++r8ujD4l4f+CWj'),
loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js',
'sha384-9N1ty7Yz7VKL3aJbOk+8ParYNW8G5W+MvxEfFL9G7CRYPmkHI9gJqyAfSI/8190W'),
loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0',
'sha384-S+Kf0r6YzKIhKA8d1k2/xtYv+j0xYUU3E7+5YLrcPVab6hBh/r1J6cq90OXhw80u'),
]);
return load_dagre_promise;

View File

@ -64,7 +64,12 @@ public:
std::lock_guard lock{mutex};
auto x = cache.get(params);
if (x)
{
if ((*x)->getUser())
return *x;
/// No user, probably the user has been dropped while it was in the cache.
cache.remove(params);
}
auto res = std::shared_ptr<ContextAccess>(new ContextAccess(manager, params));
cache.add(params, res);
return res;

View File

@ -655,7 +655,7 @@ private:
for (auto & [lhs_childname, lhs_child] : *children)
{
if (!rhs.tryGetChild(lhs_childname))
lhs_child.flags |= rhs.flags & lhs_child.getAllGrantableFlags();
lhs_child.addGrantsRec(rhs.flags);
}
}
}
@ -673,7 +673,7 @@ private:
for (auto & [lhs_childname, lhs_child] : *children)
{
if (!rhs.tryGetChild(lhs_childname))
lhs_child.flags &= rhs.flags;
lhs_child.removeGrantsRec(~rhs.flags);
}
}
}
@ -1041,17 +1041,15 @@ void AccessRights::makeIntersection(const AccessRights & other)
auto helper = [](std::unique_ptr<Node> & root_node, const std::unique_ptr<Node> & other_root_node)
{
if (!root_node)
return;
if (!other_root_node)
{
if (other_root_node)
root_node = std::make_unique<Node>(*other_root_node);
root_node = nullptr;
return;
}
if (other_root_node)
{
root_node->makeIntersection(*other_root_node);
if (!root_node->flags && !root_node->children)
root_node = nullptr;
}
};
helper(root, other.root);
helper(root_with_grant_option, other.root_with_grant_option);

View File

@ -173,6 +173,7 @@ enum class AccessType
M(MONGO, "", GLOBAL, SOURCES) \
M(MYSQL, "", GLOBAL, SOURCES) \
M(POSTGRES, "", GLOBAL, SOURCES) \
M(SQLITE, "", GLOBAL, SOURCES) \
M(ODBC, "", GLOBAL, SOURCES) \
M(JDBC, "", GLOBAL, SOURCES) \
M(HDFS, "", GLOBAL, SOURCES) \

View File

@ -163,11 +163,10 @@ void ContextAccess::setUser(const UserPtr & user_) const
if (!user)
{
/// User has been dropped.
auto nothing_granted = std::make_shared<AccessRights>();
access = nothing_granted;
access_with_implicit = nothing_granted;
subscription_for_user_change = {};
subscription_for_roles_changes = {};
access = nullptr;
access_with_implicit = nullptr;
enabled_roles = nullptr;
roles_info = nullptr;
enabled_row_policies = nullptr;
@ -252,32 +251,45 @@ String ContextAccess::getUserName() const
std::shared_ptr<const EnabledRolesInfo> ContextAccess::getRolesInfo() const
{
std::lock_guard lock{mutex};
if (roles_info)
return roles_info;
static const auto no_roles = std::make_shared<EnabledRolesInfo>();
return no_roles;
}
std::shared_ptr<const EnabledRowPolicies> ContextAccess::getEnabledRowPolicies() const
{
std::lock_guard lock{mutex};
if (enabled_row_policies)
return enabled_row_policies;
static const auto no_row_policies = std::make_shared<EnabledRowPolicies>();
return no_row_policies;
}
ASTPtr ContextAccess::getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition) const
{
std::lock_guard lock{mutex};
return enabled_row_policies ? enabled_row_policies->getCondition(database, table_name, index, extra_condition) : nullptr;
if (enabled_row_policies)
return enabled_row_policies->getCondition(database, table_name, index, extra_condition);
return nullptr;
}
std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
{
std::lock_guard lock{mutex};
if (enabled_quota)
return enabled_quota;
static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota();
return unlimited_quota;
}
std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
{
std::lock_guard lock{mutex};
return enabled_quota ? enabled_quota->getUsage() : std::optional<QuotaUsage>{};
if (enabled_quota)
return enabled_quota->getUsage();
return {};
}
@ -288,7 +300,7 @@ std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
auto full_access = std::shared_ptr<ContextAccess>(new ContextAccess);
full_access->is_full_access = true;
full_access->access = std::make_shared<AccessRights>(AccessRights::getFullAccess());
full_access->enabled_quota = EnabledQuota::getUnlimitedQuota();
full_access->access_with_implicit = std::make_shared<AccessRights>(addImplicitAccessRights(*full_access->access));
return full_access;
}();
return res;
@ -298,28 +310,40 @@ std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
std::shared_ptr<const Settings> ContextAccess::getDefaultSettings() const
{
std::lock_guard lock{mutex};
return enabled_settings ? enabled_settings->getSettings() : nullptr;
if (enabled_settings)
return enabled_settings->getSettings();
static const auto everything_by_default = std::make_shared<Settings>();
return everything_by_default;
}
std::shared_ptr<const SettingsConstraints> ContextAccess::getSettingsConstraints() const
{
std::lock_guard lock{mutex};
return enabled_settings ? enabled_settings->getConstraints() : nullptr;
if (enabled_settings)
return enabled_settings->getConstraints();
static const auto no_constraints = std::make_shared<SettingsConstraints>();
return no_constraints;
}
std::shared_ptr<const AccessRights> ContextAccess::getAccessRights() const
{
std::lock_guard lock{mutex};
if (access)
return access;
static const auto nothing_granted = std::make_shared<AccessRights>();
return nothing_granted;
}
std::shared_ptr<const AccessRights> ContextAccess::getAccessRightsWithImplicit() const
{
std::lock_guard lock{mutex};
if (access_with_implicit)
return access_with_implicit;
static const auto nothing_granted = std::make_shared<AccessRights>();
return nothing_granted;
}
@ -551,7 +575,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i)
{
const UUID & role_id = *it;
if (info && info->enabled_roles_with_admin_option.count(role_id))
if (info->enabled_roles_with_admin_option.count(role_id))
continue;
if (throw_if_denied)
@ -560,7 +584,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
if (!role_name)
role_name = "ID {" + toString(role_id) + "}";
if (info && info->enabled_roles.count(role_id))
if (info->enabled_roles.count(role_id))
show_error("Not enough privileges. "
"Role " + backQuote(*role_name) + " is granted, but without ADMIN option. "
"To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.",

View File

@ -71,11 +71,9 @@ public:
String getUserName() const;
/// Returns information about current and enabled roles.
/// The function can return nullptr.
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;
/// Returns information about enabled row policies.
/// The function can return nullptr.
std::shared_ptr<const EnabledRowPolicies> getEnabledRowPolicies() const;
/// Returns the row policy filter for a specified table.
@ -83,16 +81,13 @@ public:
ASTPtr getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition = nullptr) const;
/// Returns the quota to track resource consumption.
/// The function returns nullptr if no tracking or limitation is needed.
std::shared_ptr<const EnabledQuota> getQuota() const;
std::optional<QuotaUsage> getQuotaUsage() const;
/// Returns the default settings, i.e. the settings to apply on user's login.
/// The function returns nullptr if it's no need to apply settings.
std::shared_ptr<const Settings> getDefaultSettings() const;
/// Returns the settings' constraints.
/// The function returns nullptr if there are no constraints.
std::shared_ptr<const SettingsConstraints> getSettingsConstraints() const;
/// Returns the current access rights.

View File

@ -12,8 +12,11 @@ size_t EnabledRowPolicies::Hash::operator()(const MixedConditionKey & key) const
}
EnabledRowPolicies::EnabledRowPolicies(const Params & params_)
: params(params_)
EnabledRowPolicies::EnabledRowPolicies() : params()
{
}
EnabledRowPolicies::EnabledRowPolicies(const Params & params_) : params(params_)
{
}

View File

@ -32,6 +32,7 @@ public:
friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); }
};
EnabledRowPolicies();
~EnabledRowPolicies();
using ConditionType = RowPolicy::ConditionType;

View File

@ -18,6 +18,8 @@ namespace ErrorCodes
}
SettingsConstraints::SettingsConstraints() = default;
SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_)
{
}
@ -199,10 +201,13 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
}
};
if (manager)
{
if (reaction == THROW_ON_VIOLATION)
manager->checkSettingNameIsAllowed(setting_name);
else if (!manager->isSettingNameAllowed(setting_name))
return false;
}
Field current_value, new_value;
if (current_settings.tryGet(setting_name, current_value))

View File

@ -51,6 +51,7 @@ class AccessControlManager;
class SettingsConstraints
{
public:
SettingsConstraints();
SettingsConstraints(const AccessControlManager & manager_);
SettingsConstraints(const SettingsConstraints & src);
SettingsConstraints & operator =(const SettingsConstraints & src);

View File

@ -0,0 +1,94 @@
#include <gtest/gtest.h>
#include <Access/AccessRights.h>
using namespace DB;
TEST(AccessRights, Union)
{
AccessRights lhs, rhs;
lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
rhs.grant(AccessType::SELECT, "db2");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*");
lhs.clear();
rhs.clear();
rhs.grant(AccessType::SELECT, "db2");
lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT);
rhs.grant(AccessType::SELECT, "db1", "tb1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT ON *.*");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1, col2, col3) ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1) ON db1.tb1, GRANT SELECT(col2, col3) ON db1.tb1 WITH GRANT OPTION");
lhs = {};
rhs = {};
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}
TEST(AccessRights, Intersection)
{
AccessRights lhs, rhs;
lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
rhs.grant(AccessType::SELECT, "db2");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*");
lhs.clear();
rhs.clear();
lhs.grant(AccessType::SELECT, "db2");
rhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT);
rhs.grant(AccessType::SELECT, "db1", "tb1");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON db1.*");
}

View File

@ -101,6 +101,24 @@ struct AggregateFunctionSumData
{
const auto * end = ptr + count;
if constexpr (
(is_integer_v<T> && !is_big_int_v<T>)
|| (IsDecimalNumber<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
{
/// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
/// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
T local_sum{};
while (ptr < end)
{
T multiplier = !*null_map;
Impl::add(local_sum, *ptr * multiplier);
++ptr;
++null_map;
}
Impl::add(sum, local_sum);
return;
}
if constexpr (std::is_floating_point_v<T>)
{
constexpr size_t unroll_count = 128 / sizeof(T);

View File

@ -76,6 +76,10 @@ add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/S3)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
if (USE_SQLITE)
add_headers_and_sources(dbms Databases/SQLite)
endif()
if(USE_RDKAFKA)
add_headers_and_sources(dbms Storages/Kafka)
endif()
@ -415,6 +419,11 @@ if (USE_AWS_S3)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR})
endif()
if (USE_S2_GEOMETRY)
dbms_target_link_libraries (PUBLIC ${S2_GEOMETRY_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${S2_GEOMETRY_INCLUDE_DIR})
endif()
if (USE_BROTLI)
target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
@ -425,6 +434,10 @@ if (USE_AMQPCPP)
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${AMQPCPP_INCLUDE_DIR})
endif()
if (USE_SQLITE)
dbms_target_link_libraries(PUBLIC sqlite)
endif()
if (USE_CASSANDRA)
dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})

View File

@ -109,11 +109,23 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
/// It should not affect client address checking, since client cannot connect from IPv6 address
/// if server has no IPv6 addresses.
flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
DNSResolver::IPAddresses addresses;
try
{
#if defined(ARCADIA_BUILD)
auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
#else
auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
#endif
}
catch (const Poco::Net::DNSException & e)
{
LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.message());
addresses.clear();
}
if (addresses.empty())
throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);

View File

@ -558,6 +558,9 @@
M(588, DISTRIBUTED_BROKEN_BATCH_INFO) \
M(589, DISTRIBUTED_BROKEN_BATCH_FILES) \
M(590, CANNOT_SYSCONF) \
M(591, SQLITE_ENGINE_ERROR) \
M(592, DATA_ENCRYPTION_ERROR) \
M(593, ZERO_COPY_REPLICATION_ERROR) \
\
M(998, POSTGRESQL_CONNECTION_FAILURE) \
M(999, KEEPER_EXCEPTION) \

View File

@ -224,7 +224,7 @@
M(PerfLocalMemoryReferences, "Local NUMA node memory reads") \
M(PerfLocalMemoryMisses, "Local NUMA node memory read misses") \
\
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \
\
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \

View File

@ -375,9 +375,13 @@ void Block::setColumn(size_t position, ColumnWithTypeAndName && column)
throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Block::setColumn(), max position {}",
position, toString(data.size()));
data[position].name = std::move(column.name);
data[position].type = std::move(column.type);
data[position].column = std::move(column.column);
if (data[position].name != column.name)
{
index_by_name.erase(data[position].name);
index_by_name.emplace(column.name, position);
}
data[position] = std::move(column);
}
@ -436,7 +440,7 @@ Block Block::sortColumns() const
Block sorted_block;
/// std::unordered_map (index_by_name) cannot be used to guarantee the sort order
std::vector<decltype(index_by_name.begin())> sorted_index_by_name(index_by_name.size());
std::vector<IndexByName::const_iterator> sorted_index_by_name(index_by_name.size());
{
size_t i = 0;
for (auto it = index_by_name.begin(); it != index_by_name.end(); ++it)

View File

@ -26,13 +26,14 @@ namespace ErrorCodes
MySQLClient::MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_)
: host(host_), port(port_), user(user_), password(std::move(password_))
{
client_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION;
mysql_context.client_capabilities = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION;
}
MySQLClient::MySQLClient(MySQLClient && other)
: host(std::move(other.host)), port(other.port), user(std::move(other.user)), password(std::move(other.password))
, client_capability_flags(other.client_capability_flags)
, mysql_context(other.mysql_context)
{
mysql_context.sequence_id = 0;
}
void MySQLClient::connect()
@ -56,7 +57,7 @@ void MySQLClient::connect()
in = std::make_shared<ReadBufferFromPocoSocket>(*socket);
out = std::make_shared<WriteBufferFromPocoSocket>(*socket);
packet_endpoint = std::make_shared<PacketEndpoint>(*in, *out, seq);
packet_endpoint = mysql_context.makeEndpoint(*in, *out);
handshake();
}
@ -68,7 +69,7 @@ void MySQLClient::disconnect()
socket->close();
socket = nullptr;
connected = false;
seq = 0;
mysql_context.sequence_id = 0;
}
/// https://dev.mysql.com/doc/internals/en/connection-phase-packets.html
@ -87,10 +88,10 @@ void MySQLClient::handshake()
String auth_plugin_data = native41.getAuthPluginData();
HandshakeResponse handshake_response(
client_capability_flags, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password);
mysql_context.client_capabilities, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password);
packet_endpoint->sendPacket<HandshakeResponse>(handshake_response, true);
ResponsePacket packet_response(client_capability_flags, true);
ResponsePacket packet_response(mysql_context.client_capabilities, true);
packet_endpoint->receivePacket(packet_response);
packet_endpoint->resetSequenceId();
@ -105,7 +106,7 @@ void MySQLClient::writeCommand(char command, String query)
WriteCommand write_command(command, query);
packet_endpoint->sendPacket<WriteCommand>(write_command, true);
ResponsePacket packet_response(client_capability_flags);
ResponsePacket packet_response(mysql_context.client_capabilities);
packet_endpoint->receivePacket(packet_response);
switch (packet_response.getType())
{
@ -124,7 +125,7 @@ void MySQLClient::registerSlaveOnMaster(UInt32 slave_id)
RegisterSlave register_slave(slave_id);
packet_endpoint->sendPacket<RegisterSlave>(register_slave, true);
ResponsePacket packet_response(client_capability_flags);
ResponsePacket packet_response(mysql_context.client_capabilities);
packet_endpoint->receivePacket(packet_response);
packet_endpoint->resetSequenceId();
if (packet_response.getType() == PACKET_ERR)

View File

@ -45,9 +45,7 @@ private:
String password;
bool connected = false;
UInt32 client_capability_flags = 0;
uint8_t seq = 0;
MySQLWireContext mysql_context;
const UInt8 charset_utf8 = 33;
const String mysql_native_password = "mysql_native_password";

View File

@ -68,4 +68,15 @@ String PacketEndpoint::packetToText(const String & payload)
}
MySQLProtocol::PacketEndpointPtr MySQLWireContext::makeEndpoint(WriteBuffer & out)
{
return MySQLProtocol::PacketEndpoint::create(out, sequence_id);
}
MySQLProtocol::PacketEndpointPtr MySQLWireContext::makeEndpoint(ReadBuffer & in, WriteBuffer & out)
{
return MySQLProtocol::PacketEndpoint::create(in, out, sequence_id);
}
}

View File

@ -5,6 +5,7 @@
#include "IMySQLReadPacket.h"
#include "IMySQLWritePacket.h"
#include "IO/MySQLPacketPayloadReadBuffer.h"
#include <common/shared_ptr_helper.h>
namespace DB
{
@ -15,19 +16,13 @@ namespace MySQLProtocol
/* Writes and reads packets, keeping sequence-id.
* Throws ProtocolError, if packet with incorrect sequence-id was received.
*/
class PacketEndpoint
class PacketEndpoint : public shared_ptr_helper<PacketEndpoint>
{
public:
uint8_t & sequence_id;
ReadBuffer * in;
WriteBuffer * out;
/// For writing.
PacketEndpoint(WriteBuffer & out_, uint8_t & sequence_id_);
/// For reading and writing.
PacketEndpoint(ReadBuffer & in_, WriteBuffer & out_, uint8_t & sequence_id_);
MySQLPacketPayloadReadBuffer getPayload();
void receivePacket(IMySQLReadPacket & packet);
@ -48,8 +43,29 @@ public:
/// Converts packet to text. Is used for debug output.
static String packetToText(const String & payload);
protected:
/// For writing.
PacketEndpoint(WriteBuffer & out_, uint8_t & sequence_id_);
/// For reading and writing.
PacketEndpoint(ReadBuffer & in_, WriteBuffer & out_, uint8_t & sequence_id_);
friend struct shared_ptr_helper<PacketEndpoint>;
};
using PacketEndpointPtr = std::shared_ptr<PacketEndpoint>;
}
struct MySQLWireContext
{
uint8_t sequence_id = 0;
uint32_t client_capabilities = 0;
size_t max_packet_size = 0;
MySQLProtocol::PacketEndpointPtr makeEndpoint(WriteBuffer & out);
MySQLProtocol::PacketEndpointPtr makeEndpoint(ReadBuffer & in, WriteBuffer & out);
};
}
}

View File

@ -1,4 +1,7 @@
#include "Connection.h"
#if USE_LIBPQXX
#include <common/logger_useful.h>
namespace postgres
@ -72,3 +75,5 @@ void Connection::connect()
updateConnection();
}
}
#endif

View File

@ -1,5 +1,11 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include <boost/noncopyable.hpp>
@ -45,3 +51,5 @@ private:
Poco::Logger * log;
};
}
#endif

View File

@ -1,5 +1,11 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include <common/BorrowedObjectPool.h>
@ -35,3 +41,5 @@ private:
using ConnectionHolderPtr = std::unique_ptr<ConnectionHolder>;
}
#endif

View File

@ -1,4 +1,7 @@
#include "PoolWithFailover.h"
#if USE_LIBPQXX
#include "Utils.h"
#include <Common/parseRemoteDescription.h>
#include <Common/Exception.h>
@ -136,3 +139,5 @@ ConnectionHolderPtr PoolWithFailover::get()
throw DB::Exception(DB::ErrorCodes::POSTGRESQL_CONNECTION_FAILURE, "Unable to connect to any of the replicas");
}
}
#endif

View File

@ -1,5 +1,12 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include "ConnectionHolder.h"
#include <mutex>
#include <Poco/Util/AbstractConfiguration.h>
@ -63,3 +70,5 @@ private:
using PoolWithFailoverPtr = std::shared_ptr<PoolWithFailover>;
}
#endif

View File

@ -1,4 +1,7 @@
#include "Utils.h"
#if USE_LIBPQXX
#include <IO/Operators.h>
namespace postgres
@ -17,3 +20,5 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S
}
}
#endif

View File

@ -1,5 +1,11 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include "Connection.h"
@ -15,3 +21,5 @@ namespace postgres
{
ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password);
}
#endif

View File

@ -9,6 +9,7 @@
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Interpreters/convertFieldToType.h>
#include <IO/ReadHelpers.h>
@ -99,7 +100,16 @@ void insertPostgreSQLValue(
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDateTime64:[[fallthrough]];
case ExternalResultDescription::ValueType::vtDateTime64:
{
ReadBufferFromString in(value);
DateTime64 time = 0;
readDateTime64Text(time, 6, in, assert_cast<const DataTypeDateTime64 *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnDecimal<Decimal64> &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDecimal32: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal64: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal128: [[fallthrough]];
@ -201,6 +211,18 @@ void preparePostgreSQLArrayInfo(
ReadBufferFromString in(field);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(nested.get())->getTimeZone());
if (time < 0)
time = 0;
return time;
};
else if (which.isDateTime64())
parser = [nested](std::string & field) -> Field
{
ReadBufferFromString in(field);
DateTime64 time = 0;
readDateTime64Text(time, 6, in, assert_cast<const DataTypeDateTime64 *>(nested.get())->getTimeZone());
if (time < 0)
time = 0;
return time;
};
else if (which.isDecimal32())

View File

@ -482,6 +482,8 @@ class IColumn;
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
\
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
\
@ -524,6 +526,7 @@ class IColumn;
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
\
M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
\

View File

@ -13,5 +13,6 @@
#cmakedefine01 USE_LDAP
#cmakedefine01 USE_ROCKSDB
#cmakedefine01 USE_LIBPQXX
#cmakedefine01 USE_SQLITE
#cmakedefine01 USE_NURAFT
#cmakedefine01 USE_KRB5

View File

@ -31,6 +31,10 @@ SRCS(
MySQL/PacketsProtocolText.cpp
MySQL/PacketsReplication.cpp
NamesAndTypes.cpp
PostgreSQL/Connection.cpp
PostgreSQL/PoolWithFailover.cpp
PostgreSQL/Utils.cpp
PostgreSQL/insertPostgreSQLValue.cpp
PostgreSQLProtocol.cpp
QueryProcessingStage.cpp
Settings.cpp

View File

@ -0,0 +1,163 @@
#include "SQLiteBlockInputStream.h"
#if USE_SQLITE
#include <common/range.h>
#include <common/logger_useful.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SQLITE_ENGINE_ERROR;
}
SQLiteBlockInputStream::SQLiteBlockInputStream(
SQLitePtr sqlite_db_,
const String & query_str_,
const Block & sample_block,
const UInt64 max_block_size_)
: query_str(query_str_)
, max_block_size(max_block_size_)
, sqlite_db(std::move(sqlite_db_))
{
description.init(sample_block);
}
void SQLiteBlockInputStream::readPrefix()
{
sqlite3_stmt * compiled_stmt = nullptr;
int status = sqlite3_prepare_v2(sqlite_db.get(), query_str.c_str(), query_str.size() + 1, &compiled_stmt, nullptr);
if (status != SQLITE_OK)
throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR,
"Cannot prepate sqlite statement. Status: {}. Message: {}",
status, sqlite3_errstr(status));
compiled_statement = std::unique_ptr<sqlite3_stmt, StatementDeleter>(compiled_stmt, StatementDeleter());
}
Block SQLiteBlockInputStream::readImpl()
{
if (!compiled_statement)
return Block();
MutableColumns columns = description.sample_block.cloneEmptyColumns();
size_t num_rows = 0;
while (true)
{
int status = sqlite3_step(compiled_statement.get());
if (status == SQLITE_BUSY)
{
continue;
}
else if (status == SQLITE_DONE)
{
compiled_statement.reset();
break;
}
else if (status != SQLITE_ROW)
{
throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR,
"Expected SQLITE_ROW status, but got status {}. Error: {}, Message: {}",
status, sqlite3_errstr(status), sqlite3_errmsg(sqlite_db.get()));
}
int column_count = sqlite3_column_count(compiled_statement.get());
for (const auto idx : collections::range(0, column_count))
{
const auto & sample = description.sample_block.getByPosition(idx);
if (sqlite3_column_type(compiled_statement.get(), idx) == SQLITE_NULL)
{
insertDefaultSQLiteValue(*columns[idx], *sample.column);
continue;
}
if (description.types[idx].second)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
insertValue(column_nullable.getNestedColumn(), description.types[idx].first, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
{
insertValue(*columns[idx], description.types[idx].first, idx);
}
}
if (++num_rows == max_block_size)
break;
}
return description.sample_block.cloneWithColumns(std::move(columns));
}
void SQLiteBlockInputStream::readSuffix()
{
if (compiled_statement)
compiled_statement.reset();
}
void SQLiteBlockInputStream::insertValue(IColumn & column, const ExternalResultDescription::ValueType type, size_t idx)
{
switch (type)
{
case ValueType::vtUInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx));
break;
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx));
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx));
break;
case ValueType::vtUInt64:
/// There is no uint64 in sqlite3, only int and int64
assert_cast<ColumnUInt64 &>(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx));
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx));
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx));
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx));
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx));
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(sqlite3_column_double(compiled_statement.get(), idx));
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(sqlite3_column_double(compiled_statement.get(), idx));
break;
default:
const char * data = reinterpret_cast<const char *>(sqlite3_column_text(compiled_statement.get(), idx));
int len = sqlite3_column_bytes(compiled_statement.get(), idx);
assert_cast<ColumnString &>(column).insertData(data, len);
break;
}
}
}
#endif

View File

@ -0,0 +1,62 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_SQLITE
#include <Core/ExternalResultDescription.h>
#include <DataStreams/IBlockInputStream.h>
#include <sqlite3.h> // Y_IGNORE
namespace DB
{
class SQLiteBlockInputStream : public IBlockInputStream
{
using SQLitePtr = std::shared_ptr<sqlite3>;
public:
SQLiteBlockInputStream(SQLitePtr sqlite_db_,
const String & query_str_,
const Block & sample_block,
UInt64 max_block_size_);
String getName() const override { return "SQLite"; }
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
private:
void insertDefaultSQLiteValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
using ValueType = ExternalResultDescription::ValueType;
struct StatementDeleter
{
void operator()(sqlite3_stmt * stmt) { sqlite3_finalize(stmt); }
};
void readPrefix() override;
Block readImpl() override;
void readSuffix() override;
void insertValue(IColumn & column, const ExternalResultDescription::ValueType type, size_t idx);
String query_str;
UInt64 max_block_size;
ExternalResultDescription description;
SQLitePtr sqlite_db;
std::unique_ptr<sqlite3_stmt, StatementDeleter> compiled_statement;
};
}
#endif

View File

@ -41,6 +41,7 @@ SRCS(
RemoteBlockOutputStream.cpp
RemoteQueryExecutor.cpp
RemoteQueryExecutorReadContext.cpp
SQLiteBlockInputStream.cpp
SizeLimits.cpp
SquashingBlockInputStream.cpp
SquashingBlockOutputStream.cpp

View File

@ -42,11 +42,23 @@ public:
return it;
}
/// throws exception if value is not valid
const StringRef & getNameForValue(const T & value) const
{
return findByValue(value)->second;
}
/// returns false if value is not valid
bool getNameForValue(const T & value, StringRef & result) const
{
const auto it = value_to_name_map.find(value);
if (it == std::end(value_to_name_map))
return false;
result = it->second;
return true;
}
T getValue(StringRef field_name, bool try_treat_as_id = false) const;
template <typename TValues>

View File

@ -29,7 +29,7 @@ namespace ErrorCodes
static inline bool typeIsSigned(const IDataType & type)
{
WhichDataType data_type(type);
return data_type.isNativeInt() || data_type.isFloat();
return data_type.isNativeInt() || data_type.isFloat() || data_type.isEnum();
}
static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type)
@ -57,6 +57,10 @@ static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDa
return builder.getFloatTy();
else if (data_type.isFloat64())
return builder.getDoubleTy();
else if (data_type.isEnum8())
return builder.getInt8Ty();
else if (data_type.isEnum16())
return builder.getInt16Ty();
return nullptr;
}
@ -109,7 +113,7 @@ static inline bool canBeNativeType(const IDataType & type)
return canBeNativeType(*data_type_nullable.getNestedType());
}
return data_type.isNativeInt() || data_type.isNativeUInt() || data_type.isFloat() || data_type.isDate();
return data_type.isNativeInt() || data_type.isNativeUInt() || data_type.isFloat() || data_type.isDate() || data_type.isEnum();
}
static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type)
@ -266,7 +270,7 @@ static inline llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builde
{
return llvm::ConstantInt::get(type, column.getUInt(index));
}
else if (column_data_type.isNativeInt())
else if (column_data_type.isNativeInt() || column_data_type.isEnum())
{
return llvm::ConstantInt::get(type, column.getInt(index));
}

View File

@ -1,17 +1,17 @@
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseDictionary.h>
#include <Databases/DatabaseLazy.h>
#include <Databases/DatabaseMemory.h>
#include <Databases/DatabaseOrdinary.h>
#include <Databases/DatabaseReplicated.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/formatAST.h>
#include <Interpreters/Context.h>
#include <Common/Macros.h>
#include <filesystem>
@ -40,6 +40,10 @@
#include <Storages/PostgreSQL/MaterializedPostgreSQLSettings.h>
#endif
#if USE_SQLITE
#include <Databases/SQLite/DatabaseSQLite.h>
#endif
namespace fs = std::filesystem;
namespace DB
@ -100,7 +104,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const UUID & uuid = create.uuid;
bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "Lazy" ||
engine_name == "Replicated" || engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL";
engine_name == "Replicated" || engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL" || engine_name == "SQLite";
if (engine_define->engine->arguments && !engine_may_have_arguments)
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
@ -299,6 +303,22 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
}
#endif
#if USE_SQLITE
else if (engine_name == "SQLite")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 1)
throw Exception("SQLite database requires 1 argument: database path", ErrorCodes::BAD_ARGUMENTS);
const auto & arguments = engine->arguments->children;
String database_path = safeGetLiteralValue<String>(arguments[0], "SQLite");
return std::make_shared<DatabaseSQLite>(context, engine_define, database_path);
}
#endif
throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE);

View File

@ -9,7 +9,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <Common/quoteString.h>
@ -71,7 +71,7 @@ static DataTypePtr convertPostgreSQLDataType(String & type, const std::function<
else if (type == "bigserial")
res = std::make_shared<DataTypeUInt64>();
else if (type.starts_with("timestamp"))
res = std::make_shared<DataTypeDateTime>();
res = std::make_shared<DataTypeDateTime64>(6);
else if (type == "date")
res = std::make_shared<DataTypeDate>();
else if (type.starts_with("numeric"))

View File

@ -0,0 +1,215 @@
#include "DatabaseSQLite.h"
#if USE_SQLITE
#include <common/logger_useful.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <Databases/SQLite/fetchSQLiteTableStructure.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Interpreters/Context.h>
#include <Storages/StorageSQLite.h>
#include <Databases/SQLite/SQLiteUtils.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SQLITE_ENGINE_ERROR;
extern const int UNKNOWN_TABLE;
}
DatabaseSQLite::DatabaseSQLite(
ContextPtr context_,
const ASTStorage * database_engine_define_,
const String & database_path_)
: IDatabase("SQLite")
, WithContext(context_->getGlobalContext())
, database_engine_define(database_engine_define_->clone())
, log(&Poco::Logger::get("DatabaseSQLite"))
{
sqlite_db = openSQLiteDB(database_path_, context_);
}
bool DatabaseSQLite::empty() const
{
std::lock_guard<std::mutex> lock(mutex);
return fetchTablesList().empty();
}
DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &)
{
std::lock_guard<std::mutex> lock(mutex);
Tables tables;
auto table_names = fetchTablesList();
for (const auto & table_name : table_names)
tables[table_name] = fetchTable(table_name, local_context, true);
return std::make_unique<DatabaseTablesSnapshotIterator>(tables, database_name);
}
std::unordered_set<std::string> DatabaseSQLite::fetchTablesList() const
{
std::unordered_set<String> tables;
std::string query = "SELECT name FROM sqlite_master "
"WHERE type = 'table' AND name NOT LIKE 'sqlite_%'";
auto callback_get_data = [](void * res, int col_num, char ** data_by_col, char ** /* col_names */) -> int
{
for (int i = 0; i < col_num; ++i)
static_cast<std::unordered_set<std::string> *>(res)->insert(data_by_col[i]);
return 0;
};
char * err_message = nullptr;
int status = sqlite3_exec(sqlite_db.get(), query.c_str(), callback_get_data, &tables, &err_message);
if (status != SQLITE_OK)
{
String err_msg(err_message);
sqlite3_free(err_message);
throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR,
"Cannot fetch sqlite database tables. Error status: {}. Message: {}",
status, err_msg);
}
return tables;
}
bool DatabaseSQLite::checkSQLiteTable(const String & table_name) const
{
const String query = fmt::format("SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';", table_name);
auto callback_get_data = [](void * res, int, char **, char **) -> int
{
*(static_cast<int *>(res)) += 1;
return 0;
};
int count = 0;
char * err_message = nullptr;
int status = sqlite3_exec(sqlite_db.get(), query.c_str(), callback_get_data, &count, &err_message);
if (status != SQLITE_OK)
{
String err_msg(err_message);
sqlite3_free(err_message);
throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR,
"Cannot check sqlite table. Error status: {}. Message: {}",
status, err_msg);
}
return (count != 0);
}
bool DatabaseSQLite::isTableExist(const String & table_name, ContextPtr) const
{
std::lock_guard<std::mutex> lock(mutex);
return checkSQLiteTable(table_name);
}
StoragePtr DatabaseSQLite::tryGetTable(const String & table_name, ContextPtr local_context) const
{
std::lock_guard<std::mutex> lock(mutex);
return fetchTable(table_name, local_context, false);
}
StoragePtr DatabaseSQLite::fetchTable(const String & table_name, ContextPtr local_context, bool table_checked) const
{
if (!table_checked && !checkSQLiteTable(table_name))
return StoragePtr{};
auto columns = fetchSQLiteTableStructure(sqlite_db.get(), table_name);
if (!columns)
return StoragePtr{};
auto storage = StorageSQLite::create(
StorageID(database_name, table_name),
sqlite_db,
table_name,
ColumnsDescription{*columns},
ConstraintsDescription{},
local_context);
return storage;
}
ASTPtr DatabaseSQLite::getCreateDatabaseQuery() const
{
const auto & create_query = std::make_shared<ASTCreateQuery>();
create_query->database = getDatabaseName();
create_query->set(create_query->storage, database_engine_define);
return create_query;
}
ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const
{
auto storage = fetchTable(table_name, local_context, false);
if (!storage)
{
if (throw_on_error)
throw Exception(ErrorCodes::UNKNOWN_TABLE, "SQLite table {}.{} does not exist",
database_name, table_name);
return nullptr;
}
auto create_table_query = std::make_shared<ASTCreateQuery>();
auto table_storage_define = database_engine_define->clone();
create_table_query->set(create_table_query->storage, table_storage_define);
auto columns_declare_list = std::make_shared<ASTColumns>();
auto columns_expression_list = std::make_shared<ASTExpressionList>();
columns_declare_list->set(columns_declare_list->columns, columns_expression_list);
create_table_query->set(create_table_query->columns_list, columns_declare_list);
/// init create query.
auto table_id = storage->getStorageID();
create_table_query->table = table_id.table_name;
create_table_query->database = table_id.database_name;
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary())
{
const auto & column_declaration = std::make_shared<ASTColumnDeclaration>();
column_declaration->name = column_type_and_name.name;
column_declaration->type = getColumnDeclaration(column_type_and_name.type);
columns_expression_list->children.emplace_back(column_declaration);
}
ASTStorage * ast_storage = table_storage_define->as<ASTStorage>();
ASTs storage_children = ast_storage->children;
auto storage_engine_arguments = ast_storage->engine->arguments;
/// Add table_name to engine arguments
storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 1, std::make_shared<ASTLiteral>(table_id.table_name));
return create_table_query;
}
ASTPtr DatabaseSQLite::getColumnDeclaration(const DataTypePtr & data_type) const
{
WhichDataType which(data_type);
if (which.isNullable())
return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
return std::make_shared<ASTIdentifier>(data_type->getName());
}
}
#endif

Some files were not shown because too many files have changed in this diff Show More