Merge branch 'master' into improve_create_or_replace

This commit is contained in:
mergify[bot] 2021-08-03 11:39:07 +00:00 committed by GitHub
commit dc57254982
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
829 changed files with 10080 additions and 4950 deletions

View File

@ -19,9 +19,9 @@ Detailed description / Documentation draft:
...
By adding documentation, you'll allow users to try your new feature immediately, not when someone else will have time to document it later. Documentation is necessary for all features that affect user experience in any way. You can add brief documentation draft above, or add documentation right into your patch as Markdown files in [docs](https://github.com/ClickHouse/ClickHouse/tree/master/docs) folder.
> By adding documentation, you'll allow users to try your new feature immediately, not when someone else will have time to document it later. Documentation is necessary for all features that affect user experience in any way. You can add brief documentation draft above, or add documentation right into your patch as Markdown files in [docs](https://github.com/ClickHouse/ClickHouse/tree/master/docs) folder.
If you are doing this for the first time, it's recommended to read the lightweight [Contributing to ClickHouse Documentation](https://github.com/ClickHouse/ClickHouse/tree/master/docs/README.md) guide first.
> If you are doing this for the first time, it's recommended to read the lightweight [Contributing to ClickHouse Documentation](https://github.com/ClickHouse/ClickHouse/tree/master/docs/README.md) guide first.
Information about CI checks: https://clickhouse.tech/docs/en/development/continuous-integration/
> Information about CI checks: https://clickhouse.tech/docs/en/development/continuous-integration/

9
.gitmodules vendored
View File

@ -225,6 +225,15 @@
[submodule "contrib/yaml-cpp"]
path = contrib/yaml-cpp
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
[submodule "contrib/libstemmer_c"]
path = contrib/libstemmer_c
url = https://github.com/ClickHouse-Extras/libstemmer_c.git
[submodule "contrib/wordnet-blast"]
path = contrib/wordnet-blast
url = https://github.com/ClickHouse-Extras/wordnet-blast.git
[submodule "contrib/lemmagen-c"]
path = contrib/lemmagen-c
url = https://github.com/ClickHouse-Extras/lemmagen-c.git
[submodule "contrib/libpqxx"]
path = contrib/libpqxx
url = https://github.com/ClickHouse-Extras/libpqxx.git

View File

@ -542,6 +542,7 @@ include (cmake/find/libpqxx.cmake)
include (cmake/find/nuraft.cmake)
include (cmake/find/yaml-cpp.cmake)
include (cmake/find/s2geometry.cmake)
include (cmake/find/nlp.cmake)
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
set (ENABLE_ORC OFF CACHE INTERNAL "")

View File

@ -259,10 +259,25 @@ private:
Poco::Logger * log;
BaseDaemon & daemon;
void onTerminate(const std::string & message, UInt32 thread_num) const
void onTerminate(std::string_view message, UInt32 thread_num) const
{
size_t pos = message.find('\n');
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message);
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos));
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
while (pos != std::string_view::npos)
{
++pos;
size_t next_pos = message.find('\n', pos);
size_t size = next_pos;
if (next_pos != std::string_view::npos)
size = next_pos - pos;
LOG_FATAL(log, "{}", message.substr(pos, size));
pos = next_pos;
}
}
void onFault(

View File

@ -4,13 +4,24 @@ QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
if [ -x ./clickhouse ]
then
CLICKHOUSE_CLIENT="./clickhouse client"
elif command -v clickhouse-client >/dev/null 2>&1
then
CLICKHOUSE_CLIENT="clickhouse-client"
else
echo "clickhouse-client is not found"
exit 1
fi
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1)
RES=$(${CLICKHOUSE_CLIENT} --time --format=Null --max_memory_usage=100G --query="$query" 2>&1)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done

View File

@ -11,8 +11,8 @@ DATASET="${TABLE}_v1.tar.xz"
QUERIES_FILE="queries.sql"
TRIES=3
AMD64_BIN_URL="https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_build_check/gcc-10_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"
AARCH64_BIN_URL="https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_special_build_check/clang-10-aarch64_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"
AMD64_BIN_URL="https://builds.clickhouse.tech/master/amd64/clickhouse"
AARCH64_BIN_URL="https://builds.clickhouse.tech/master/aarch64/clickhouse"
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
@ -89,7 +89,7 @@ cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(./clickhouse client --max_memory_usage 100000000000 --time --format=Null --query="$query" 2>&1 ||:)
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done

32
cmake/find/nlp.cmake Normal file
View File

@ -0,0 +1,32 @@
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (NOT ENABLE_NLP)
message (STATUS "NLP functions disabled")
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c/Makefile")
message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libstemmer_c library, NLP functions will be disabled")
set (USE_NLP 0)
return()
endif ()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast/CMakeLists.txt")
message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal wordnet-blast library, NLP functions will be disabled")
set (USE_NLP 0)
return()
endif ()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c/README.md")
message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal lemmagen-c library, NLP functions will be disabled")
set (USE_NLP 0)
return()
endif ()
set (USE_NLP 1)
message (STATUS "Using Libraries for NLP functions: contrib/wordnet-blast, contrib/libstemmer_c, contrib/lemmagen-c")

View File

@ -328,6 +328,12 @@ endif()
add_subdirectory(fast_float)
if (USE_NLP)
add_subdirectory(libstemmer-c-cmake)
add_subdirectory(wordnet-blast-cmake)
add_subdirectory(lemmagen-c-cmake)
endif()
if (USE_SQLITE)
add_subdirectory(sqlite-cmake)
endif()

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 976874b7aa7f422bf4ea595bb7d1166c617b1c26
Subproject commit 0ce9490093021c63564cca159571a8b27772ad48

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 1ccbb5a522a571ce83b606dbc2e1011c42ecccfb
Subproject commit 9cf09dbfd55a5c6202dedbdf40781a51b02c2675

View File

@ -13,11 +13,12 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
regex
context
coroutine
graph
)
if(Boost_INCLUDE_DIR AND Boost_FILESYSTEM_LIBRARY AND Boost_FILESYSTEM_LIBRARY AND
Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY AND Boost_CONTEXT_LIBRARY AND
Boost_COROUTINE_LIBRARY)
Boost_COROUTINE_LIBRARY AND Boost_GRAPH_LIBRARY)
set(EXTERNAL_BOOST_FOUND 1)
@ -32,6 +33,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
add_library (_boost_system INTERFACE)
add_library (_boost_context INTERFACE)
add_library (_boost_coroutine INTERFACE)
add_library (_boost_graph INTERFACE)
target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
@ -40,6 +42,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
target_link_libraries (_boost_context INTERFACE ${Boost_CONTEXT_LIBRARY})
target_link_libraries (_boost_coroutine INTERFACE ${Boost_COROUTINE_LIBRARY})
target_link_libraries (_boost_graph INTERFACE ${Boost_GRAPH_LIBRARY})
add_library (boost::filesystem ALIAS _boost_filesystem)
add_library (boost::iostreams ALIAS _boost_iostreams)
@ -48,6 +51,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
add_library (boost::system ALIAS _boost_system)
add_library (boost::context ALIAS _boost_context)
add_library (boost::coroutine ALIAS _boost_coroutine)
add_library (boost::graph ALIAS _boost_graph)
else()
set(EXTERNAL_BOOST_FOUND 0)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system boost")
@ -221,4 +225,17 @@ if (NOT EXTERNAL_BOOST_FOUND)
add_library (boost::coroutine ALIAS _boost_coroutine)
target_include_directories (_boost_coroutine PRIVATE ${LIBRARY_DIR})
target_link_libraries(_boost_coroutine PRIVATE _boost_context)
# graph
set (SRCS_GRAPH
"${LIBRARY_DIR}/libs/graph/src/graphml.cpp"
"${LIBRARY_DIR}/libs/graph/src/read_graphviz_new.cpp"
)
add_library (_boost_graph ${SRCS_GRAPH})
add_library (boost::graph ALIAS _boost_graph)
target_include_directories (_boost_graph PRIVATE ${LIBRARY_DIR})
target_link_libraries(_boost_graph PRIVATE _boost_regex)
endif ()

1
contrib/lemmagen-c vendored Submodule

@ -0,0 +1 @@
Subproject commit 59537bdcf57bbed17913292cb4502d15657231f1

View File

@ -0,0 +1,9 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c")
set(LEMMAGEN_INCLUDE_DIR "${LIBRARY_DIR}/include")
set(SRCS
"${LIBRARY_DIR}/src/RdrLemmatizer.cpp"
)
add_library(lemmagen STATIC ${SRCS})
target_include_directories(lemmagen PUBLIC "${LEMMAGEN_INCLUDE_DIR}")

View File

@ -0,0 +1,31 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c")
set(STEMMER_INCLUDE_DIR "${LIBRARY_DIR}/include")
FILE ( READ "${LIBRARY_DIR}/mkinc.mak" _CONTENT )
# replace '\ ' into one big line
STRING ( REGEX REPLACE "\\\\\n " " ${LIBRARY_DIR}/" _CONTENT "${_CONTENT}" )
# escape ';' (if any)
STRING ( REGEX REPLACE ";" "\\\\;" _CONTENT "${_CONTENT}" )
# now replace lf into ';' (it makes list from the line)
STRING ( REGEX REPLACE "\n" ";" _CONTENT "${_CONTENT}" )
FOREACH ( LINE ${_CONTENT} )
# skip comments (beginning with #)
IF ( NOT "${LINE}" MATCHES "^#.*" )
# parse 'name=value1 value2..." - extract the 'name' part
STRING ( REGEX REPLACE "=.*$" "" _NAME "${LINE}" )
# extract the list of values part
STRING ( REGEX REPLACE "^.*=" "" _LIST "${LINE}" )
# replace (multi)spaces into ';' (it makes list from the line)
STRING ( REGEX REPLACE " +" ";" _LIST "${_LIST}" )
# finally get our two variables
IF ( "${_NAME}" MATCHES "snowball_sources" )
SET ( _SOURCES "${_LIST}" )
ELSEIF ( "${_NAME}" MATCHES "snowball_headers" )
SET ( _HEADERS "${_LIST}" )
ENDIF ()
endif ()
endforeach ()
# all the sources parsed. Now just add the lib
add_library ( stemmer STATIC ${_SOURCES} ${_HEADERS} )
target_include_directories (stemmer PUBLIC "${STEMMER_INCLUDE_DIR}")

1
contrib/libstemmer_c vendored Submodule

@ -0,0 +1 @@
Subproject commit c753054304d87daf460057c1a649c482aa094835

View File

@ -22,6 +22,7 @@ set(SRCS
"${LIBRARY_DIR}/src/launcher.cxx"
"${LIBRARY_DIR}/src/srv_config.cxx"
"${LIBRARY_DIR}/src/snapshot_sync_req.cxx"
"${LIBRARY_DIR}/src/snapshot_sync_ctx.cxx"
"${LIBRARY_DIR}/src/handle_timeout.cxx"
"${LIBRARY_DIR}/src/handle_append_entries.cxx"
"${LIBRARY_DIR}/src/cluster_config.cxx"

2
contrib/protobuf vendored

@ -1 +1 @@
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee
Subproject commit 75601841d172c73ae6bf4ce8121f42b875cdbabd

1
contrib/wordnet-blast vendored Submodule

@ -0,0 +1 @@
Subproject commit 1d16ac28036e19fe8da7ba72c16a307fbdf8c87e

View File

@ -0,0 +1,13 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast")
set(SRCS
"${LIBRARY_DIR}/wnb/core/info_helper.cc"
"${LIBRARY_DIR}/wnb/core/load_wordnet.cc"
"${LIBRARY_DIR}/wnb/core/wordnet.cc"
)
add_library(wnb ${SRCS})
target_link_libraries(wnb PRIVATE boost::headers_only boost::graph)
target_include_directories(wnb PUBLIC "${LIBRARY_DIR}")

View File

@ -23,6 +23,7 @@ RUN apt-get update \
libboost-regex-dev \
libboost-context-dev \
libboost-coroutine-dev \
libboost-graph-dev \
zlib1g-dev \
liblz4-dev \
libdouble-conversion-dev \

View File

@ -61,4 +61,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
ENV TZ=Europe/Moscow
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
CMD sleep 1

View File

@ -299,6 +299,7 @@ function run_tests
01318_decrypt # Depends on OpenSSL
01663_aes_msan # Depends on OpenSSL
01667_aes_args_check # Depends on OpenSSL
01683_codec_encrypted # Depends on OpenSSL
01776_decrypt_aead_size_check # Depends on OpenSSL
01811_filter_by_null # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
@ -310,6 +311,7 @@ function run_tests
01411_bayesian_ab_testing
01798_uniq_theta_sketch
01799_long_uniq_theta_sketch
01890_stem # depends on libstemmer_c
collate
collation
_orc_

View File

@ -14,10 +14,14 @@ services:
}
EOF
./docker-entrypoint.sh'
ports:
- 9020:9019
expose:
- 9019
healthcheck:
test: ["CMD", "curl", "-s", "localhost:9019/ping"]
interval: 5s
timeout: 3s
retries: 30
volumes:
- type: ${JDBC_BRIDGE_FS:-tmpfs}
source: ${JDBC_BRIDGE_LOGS:-}
target: /app/logs

View File

@ -0,0 +1,13 @@
version: '2.3'
services:
mongo1:
image: mongo:3.6
restart: always
environment:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: clickhouse
volumes:
- ${MONGO_CONFIG_PATH}:/mongo/
ports:
- ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT}
command: --config /mongo/mongo_secure.conf --profile=2 --verbose

View File

@ -2,7 +2,7 @@ version: '2.3'
services:
rabbitmq1:
image: rabbitmq:3-management-alpine
image: rabbitmq:3.8-management-alpine
hostname: rabbitmq1
expose:
- ${RABBITMQ_PORT}

View File

@ -2,6 +2,11 @@
set -e -x
# Choose random timezone for this test run
TZ="$(grep -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
dpkg -i package_folder/clickhouse-common-static_*.deb;
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb

View File

@ -3,6 +3,11 @@
# fail on errors, verbose and export all env variables
set -e -x -a
# Choose random timezone for this test run.
TZ="$(grep -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
@ -138,6 +143,7 @@ if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
fi
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
tar -chf /test_output/zookeeper_log_dump.tar /var/lib/clickhouse/data/system/zookeeper_log ||:
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
@ -147,6 +153,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
tar -chf /test_output/zookeeper_log_dump1.tar /var/lib/clickhouse1/data/system/zookeeper_log ||:
tar -chf /test_output/zookeeper_log_dump2.tar /var/lib/clickhouse2/data/system/zookeeper_log ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
fi

View File

@ -77,9 +77,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -20,6 +20,7 @@ def get_skip_list_cmd(path):
def get_options(i):
options = []
client_options = []
if 0 < i:
options.append("--order=random")
@ -27,25 +28,29 @@ def get_options(i):
options.append("--db-engine=Ordinary")
if i % 3 == 2:
options.append('''--client-option='allow_experimental_database_replicated=1' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
client_options.append('allow_experimental_database_replicated=1')
# If database name is not specified, new database is created for each functional test.
# Run some threads with one database for all tests.
if i % 2 == 1:
options.append(" --database=test_{}".format(i))
if i % 7 == 0:
options.append(" --client-option='join_use_nulls=1'")
if i % 5 == 1:
client_options.append("join_use_nulls=1")
if i % 14 == 0:
options.append(' --client-option="join_algorithm=\'partial_merge\'"')
if i % 15 == 6:
client_options.append("join_algorithm='partial_merge'")
if i % 21 == 0:
options.append(' --client-option="join_algorithm=\'auto\'"')
options.append(' --client-option="max_rows_in_join=1000"')
if i % 15 == 11:
client_options.append("join_algorithm='auto'")
client_options.append('max_rows_in_join=1000')
if i == 13:
options.append(" --client-option='memory_tracker_fault_probability=0.00001'")
client_options.append('memory_tracker_fault_probability=0.001')
if client_options:
options.append(" --client-option " + ' '.join(client_options))
return ' '.join(options)

View File

@ -35,7 +35,7 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip3 install urllib3 testflows==1.6.90 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal python-dateutil numpy
RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal python-dateutil numpy
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 20.10.6

View File

@ -1,8 +1,6 @@
# docker build -t yandex/clickhouse-unit-test .
FROM yandex/clickhouse-stateless-test
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get install gdb
COPY run.sh /

View File

@ -8,7 +8,7 @@ toc_title: Third-Party Libraries Used
The list of third-party libraries can be obtained by the following query:
``` sql
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en';
```
[Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)

View File

@ -749,7 +749,7 @@ If your code in the `master` branch is not buildable yet, exclude it from the bu
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse. See [Guidelines for adding new third-party libraries](contrib.md#adding-third-party-libraries) for details.
**3.** Preference is always given to libraries that are already in use.

View File

@ -70,7 +70,13 @@ Note that integration of ClickHouse with third-party drivers is not tested. Also
Unit tests are useful when you want to test not the ClickHouse as a whole, but a single isolated library or class. You can enable or disable build of tests with `ENABLE_TESTS` CMake option. Unit tests (and other test programs) are located in `tests` subdirectories across the code. To run unit tests, type `ninja test`. Some tests use `gtest`, but some are just programs that return non-zero exit code on test failure.
Its not necessarily to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use).
Its not necessary to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use).
You can run individual gtest checks by calling the executable directly, for example:
```bash
$ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
```
## Performance Tests {#performance-tests}

View File

@ -14,7 +14,7 @@ You can also use the following database engines:
- [MySQL](../../engines/database-engines/mysql.md)
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)

View File

@ -1,9 +1,9 @@
---
toc_priority: 29
toc_title: MaterializeMySQL
toc_title: MaterializedMySQL
---
# MaterializeMySQL {#materialize-mysql}
# MaterializedMySQL {#materialized-mysql}
**This is experimental feature that should not be used in production.**
@ -17,7 +17,7 @@ This feature is experimental.
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
```
**Engine Parameters**
@ -36,15 +36,22 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor
- `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disable retry. Default: `1000`.
- `allows_query_when_mysql_lost` — Allow query materialized table when mysql is lost. Default: `0` (`false`).
```
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***')
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
SETTINGS
allows_query_when_mysql_lost=true,
max_wait_time_when_mysql_unavailable=10000;
```
**Settings on MySQL-server side**
For the correct work of `MaterializeMySQL`, there are few mandatory `MySQL`-side configuration settings that should be set:
- `default_authentication_plugin = mysql_native_password` since `MaterializeMySQL` can only authorize with this method.
- `gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializeMySQL` replication. Pay attention that while turning this mode `On` you should also specify `enforce_gtid_consistency = on`.
## Virtual columns {#virtual-columns}
When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
- `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values:
@ -70,6 +77,7 @@ When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](.
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
@ -77,13 +85,21 @@ Other types are not supported. If MySQL table contains a column of such type, Cl
## Specifics and Recommendations {#specifics-and-recommendations}
### Compatibility restrictions
Apart of the data types limitations there are few restrictions comparing to `MySQL` databases, that should be resolved before replication will be possible:
- Each table in `MySQL` should contain `PRIMARY KEY`.
- Replication for tables, those are containing rows with `ENUM` field values out of range (specified in `ENUM` signature) will not work.
### DDL Queries {#ddl-queries}
MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). If ClickHouse cannot parse some DDL query, the query is ignored.
### Data Replication {#data-replication}
`MaterializeMySQL` does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
`MaterializedMySQL` does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
- MySQL `INSERT` query is converted into `INSERT` with `_sign=1`.
@ -91,9 +107,9 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([
- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1`.
### Selecting from MaterializeMySQL Tables {#select}
### Selecting from MaterializedMySQL Tables {#select}
`SELECT` query from `MaterializeMySQL` tables has some specifics:
`SELECT` query from `MaterializedMySQL` tables has some specifics:
- If `_version` is not specified in the `SELECT` query, [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used. So only rows with `MAX(_version)` are selected.
@ -110,10 +126,10 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
**Notes**
- Rows with `_sign=-1` are not deleted physically from the tables.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine.
- Replication can be easily broken.
- Manual operations on database and tables are forbidden.
- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes.
- `MaterializedMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL server changes.
## Examples of Use {#examples-of-use}
@ -142,7 +158,7 @@ Database in ClickHouse, exchanging data with the MySQL server:
The database and the table created:
``` sql
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***');
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```
@ -177,4 +193,4 @@ SELECT * FROM mysql.test;
└───┴─────┴──────┘
```
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialized-mysql/) <!--hide-->

View File

@ -39,4 +39,46 @@ ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
## Metrics
There is also `system.rocksdb` table, that expose rocksdb statistics:
```sql
SELECT
name,
value
FROM system.rocksdb
┌─name──────────────────────┬─value─┐
│ no.file.opens │ 1 │
│ number.block.decompressed │ 1 │
└───────────────────────────┴───────┘
```
## Configuration
You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wiki/Option-String-and-Option-Map) using config:
```xml
<rocksdb>
<options>
<max_background_jobs>8</max_background_jobs>
</options>
<column_family_options>
<num_levels>2</num_levels>
</column_family_options>
<tables>
<table>
<name>TABLE</name>
<options>
<max_background_jobs>8</max_background_jobs>
</options>
<column_family_options>
<num_levels>2</num_levels>
</column_family_options>
</table>
</tables>
</rocksdb>
```
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/embedded-rocksdb/) <!--hide-->

View File

@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
name1 [type1],
name2 [type2],
...
) ENGINE = MongoDB(host:port, database, collection, user, password);
) ENGINE = MongoDB(host:port, database, collection, user, password [, options]);
```
**Engine Parameters**
@ -30,9 +30,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
- `password` — User password.
- `options` — MongoDB connection string options (optional parameter).
## Usage Example {#usage-example}
Table in ClickHouse which allows to read data from MongoDB collection:
Create a table in ClickHouse which allows to read data from MongoDB collection:
``` text
CREATE TABLE mongo_table
@ -42,6 +44,16 @@ CREATE TABLE mongo_table
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
```
To read from an SSL secured MongoDB server:
``` text
CREATE TABLE mongo_table_ssl
(
key UInt64,
data String
) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true');
```
Query:
``` sql

View File

@ -14,6 +14,8 @@ Engines of the family:
- [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
`Log` family table engines can store data to [HDFS](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-hdfs) or [S3](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3) distributed file systems.
## Common Properties {#common-properties}
Engines:

View File

@ -5,10 +5,8 @@ toc_title: Log
# Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.
The engine belongs to the family of `Log` engines. See the common properties of `Log` engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.
Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
`Log` differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log/) <!--hide-->
The `Log` engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The `Log` engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.

View File

@ -728,7 +728,7 @@ During this time, they are not moved to other volumes or disks. Therefore, until
## Using S3 for Data Storage {#table_engine-mergetree-s3}
`MergeTree` family table engines is able to store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
This feature is under development and not ready for production. There are known drawbacks such as very low performance.
@ -764,11 +764,13 @@ Configuration markup:
```
Required parameters:
- `endpoint` — S3 endpoint url in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint url should contain bucket and root path to store data.
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
- `access_key_id` — S3 access key id.
- `secret_access_key` — S3 secret access key.
Optional parameters:
- `region` — S3 region name.
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
@ -784,7 +786,6 @@ Optional parameters:
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
S3 disk can be configured as `main` or `cold` storage:
``` xml
<storage_configuration>
@ -823,4 +824,43 @@ S3 disk can be configured as `main` or `cold` storage:
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) <!--hide-->
## Using HDFS for Data Storage {#table_engine-mergetree-hdfs}
[HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) is a distributed file system for remote data storage.
`MergeTree` family table engines can store data to HDFS using a disk with type `HDFS`.
Configuration markup:
``` xml
<yandex>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
</hdfs>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
```
Required parameters:
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
Optional parameters:
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.

View File

@ -43,7 +43,7 @@ toc_title: Integrations
- Monitoring
- [Graphite](https://graphiteapp.org)
- [graphouse](https://github.com/yandex/graphouse)
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) +
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse)
- [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse)
- [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied
- [Grafana](https://grafana.com/)

View File

@ -115,6 +115,7 @@ toc_title: Adopters
| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| <a href="https://www.spark.co.nz/" class="favicon">Spark New Zealand</a> | Telecommunications | Security Operations | — | — | [Blog Post, Feb 2020](https://blog.n0p.me/2020/02/2020-02-05-dnsmonster/) |
| <a href="https://splitbee.io" class="favicon">Splitbee</a> | Analytics | Main Product | — | — | [Blog Post, Mai 2021](https://splitbee.io/blog/new-pricing) |
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |

View File

@ -5,50 +5,67 @@ toc_title: Testing Hardware
# How to Test Your Hardware with ClickHouse {#how-to-test-your-hardware-with-clickhouse}
With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages.
You can run basic ClickHouse performance test on any server without installation of ClickHouse packages.
1. Go to “commits” page: https://github.com/ClickHouse/ClickHouse/commits/master
2. Click on the first green check mark or red cross with green “ClickHouse Build Check” and click on the “Details” link near “ClickHouse Build Check”. There is no such link in some commits, for example commits with documentation. In this case, choose the nearest commit having this link.
3. Copy the link to `clickhouse` binary for amd64 or aarch64.
4. ssh to the server and download it with wget:
## Automated Run
You can run benchmark with a single script.
1. Download the script.
```
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/hardware.sh
```
2. Run the script.
```
chmod a+x ./hardware.sh
./hardware.sh
```
3. Copy the output and send it to clickhouse-feedback@yandex-team.com
All the results are published here: https://clickhouse.tech/benchmark/hardware/
## Manual Run
Alternatively you can perform benchmark in the following steps.
1. ssh to the server and download the binary with wget:
```bash
# These links are outdated, please obtain the fresh link from the "commits" page.
# For amd64:
wget https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_build_check/gcc-10_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse
wget https://builds.clickhouse.tech/master/amd64/clickhouse
# For aarch64:
wget https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_special_build_check/clang-10-aarch64_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse
wget https://builds.clickhouse.tech/master/aarch64/clickhouse
# Then do:
chmod a+x clickhouse
```
5. Download benchmark files:
2. Download benchmark files:
```bash
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/benchmark-new.sh
chmod a+x benchmark-new.sh
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql
```
6. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
3. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
```bash
wget https://datasets.clickhouse.tech/hits/partitions/hits_100m_obfuscated_v1.tar.xz
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
mv hits_100m_obfuscated_v1/* .
```
7. Run the server:
4. Run the server:
```bash
./clickhouse server
```
8. Check the data: ssh to the server in another terminal
5. Check the data: ssh to the server in another terminal
```bash
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
100000000
```
9. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `--max_memory_usage 100000000000` parameter.
```bash
mcedit benchmark-new.sh
```
10. Run the benchmark:
6. Run the benchmark:
```bash
./benchmark-new.sh hits_100m_obfuscated
```
11. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
7. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
All the results are published here: https://clickhouse.tech/benchmark/hardware/

View File

@ -69,6 +69,28 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). The command, or a shell script, is expected to write a Base64-encoded key of any length to the stdout.
**Example**
For Linux with systemd:
```xml
<encryption>
<key_command>/usr/bin/systemd-ask-password --id="clickhouse-server" --timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command>
</encryption>
```
For other systems:
```xml
<encryption>
<key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command>
</encryption>
```
## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.

View File

@ -280,14 +280,13 @@ Default value: `0`.
## check_sample_column_is_correct {#check_sample_column_is_correct}
Enables to check column for sampling or sampling expression is correct at table creation.
Enables the check at table creation, that the data type of a column for sampling or sampling expression is correct. The data type must be one of unsigned [integer types](../../sql-reference/data-types/int-uint.md): `UInt8`, `UInt16`, `UInt32`, `UInt64`.
Possible values:
- true — Check column or sampling expression is correct at table creation.
- false — Do not check column or sampling expression is correct at table creation.
- true — The check is enabled.
- false — The check is disabled at table creation.
Default value: `true`.
By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting.
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->
By default, the ClickHouse server checks at table creation the data type of a column for sampling or sampling expression. If you already have tables with incorrect sampling expression and do not want the server to raise an exception during startup, set `check_sample_column_is_correct` to `false`.

View File

@ -28,7 +28,7 @@ Structure of the `users` section:
<profile>profile_name</profile>
<quota>default</quota>
<default_database>default<default_database>
<databases>
<database_name>
<table_name>

View File

@ -20,6 +20,29 @@ Possible values:
- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.`
- `allow` — Allows the use of these types of subqueries.
## prefer_global_in_and_join {#prefer-global-in-and-join}
Enables the replacement of `IN`/`JOIN` operators with `GLOBAL IN`/`GLOBAL JOIN`.
Possible values:
- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`.
- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`.
Default value: `0`.
**Usage**
Although `SET distributed_product_mode=global` can change the queries behavior for the distributed tables, it's not suitable for local tables or tables from external resources. Here is when the `prefer_global_in_and_join` setting comes into play.
For example, we have query serving nodes that contain local tables, which are not suitable for distribution. We need to scatter their data on the fly during distributed processing with the `GLOBAL` keyword — `GLOBAL IN`/`GLOBAL JOIN`.
Another use case of `prefer_global_in_and_join` is accessing tables created by external engines. This setting helps to reduce the number of calls to external sources while joining such tables: only one call per query.
**See also:**
- [Distributed subqueries](../../sql-reference/operators/in.md#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN`
## enable_optimize_predicate_expression {#enable-optimize-predicate-expression}
Turns on predicate pushdown in `SELECT` queries.
@ -2927,7 +2950,7 @@ Result:
└─────────────┘
```
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) behaviour.
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}

View File

@ -62,4 +62,3 @@ exception_code: ZOK
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->

View File

@ -51,6 +51,7 @@ Columns:
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query.
- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully.
@ -65,6 +66,8 @@ Columns:
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query.
- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution).
- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution).
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values:
- 1 — TCP.
- 2 — HTTP.
@ -101,55 +104,77 @@ Columns:
**Example**
``` sql
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
type: QueryFinish
event_date: 2021-07-28
event_time: 2021-07-28 13:46:56
event_time_microseconds: 2021-07-28 13:46:56.719791
query_start_time: 2021-07-28 13:46:56
query_start_time_microseconds: 2021-07-28 13:46:56.704542
query_duration_ms: 14
read_rows: 8393
read_bytes: 374325
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
result_rows: 4201
result_bytes: 153024
memory_usage: 4714038
current_database: default
query: INSERT INTO test1 VALUES
query: SELECT DISTINCT arrayJoin(extractAll(name, '[\\w_]{2,}')) AS res FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.table_engines UNION ALL SELECT name FROM system.formats UNION ALL SELECT name FROM system.table_functions UNION ALL SELECT name FROM system.data_type_families UNION ALL SELECT name FROM system.merge_tree_settings UNION ALL SELECT name FROM system.settings UNION ALL SELECT cluster FROM system.clusters UNION ALL SELECT macro FROM system.macros UNION ALL SELECT policy_name FROM system.storage_policies UNION ALL SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate UNION ALL SELECT name FROM system.databases LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.tables LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.dictionaries LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.columns LIMIT 10000) WHERE notEmpty(res)
normalized_query_hash: 6666026786019643712
query_kind: Select
databases: ['system']
tables: ['system.aggregate_function_combinators','system.clusters','system.columns','system.data_type_families','system.databases','system.dictionaries','system.formats','system.functions','system.macros','system.merge_tree_settings','system.settings','system.storage_policies','system.table_engines','system.table_functions','system.tables']
columns: ['system.aggregate_function_combinators.name','system.clusters.cluster','system.columns.name','system.data_type_families.name','system.databases.name','system.dictionaries.name','system.formats.name','system.functions.is_aggregate','system.functions.name','system.macros.macro','system.merge_tree_settings.name','system.settings.name','system.storage_policies.policy_name','system.table_engines.name','system.table_functions.name','system.tables.name']
projections: []
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
address: ::ffff:127.0.0.1
port: 33452
port: 51006
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
initial_address: ::ffff:127.0.0.1
initial_port: 33452
initial_port: 51006
initial_query_start_time: 2021-07-28 13:46:56
initial_query_start_time_microseconds: 2021-07-28 13:46:56.704542
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
os_user:
client_hostname:
client_name: ClickHouse client
client_revision: 54449
client_version_major: 21
client_version_minor: 8
client_version_patch: 0
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54440
thread_ids: []
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
revision: 54453
log_comment:
thread_ids: [5058,22097,22110,22094]
ProfileEvents.Names: ['Query','SelectQuery','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSWriteChars']
ProfileEvents.Values: [1,1,39,352256,64,360,8393,374325,412,440,34480,13108,4723,671,19,17828,8192,10240]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: []
used_aggregate_function_combinators: []
used_database_engines: []
used_data_type_families: ['UInt64','UInt8','Nullable','String','date']
used_dictionaries: []
used_formats: []
used_functions: ['concat','notEmpty','extractAll']
used_storages: []
used_table_functions: []
```
**See Also**

View File

@ -275,9 +275,13 @@ The dictionary is stored in a cache that has a fixed number of cells. These cell
When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache.
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used, and it is re-requested the next time it needs to be used.
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`.
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table.
If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source.
To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally.
Supported [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): MySQL, ClickHouse, executable, HTTP.
@ -289,6 +293,16 @@ Example of settings:
<cache>
<!-- The size of the cache, in number of cells. Rounded up to a power of two. -->
<size_in_cells>1000000000</size_in_cells>
<!-- Allows to read expired keys. -->
<allow_read_expired_keys>0</allow_read_expired_keys>
<!-- Max size of update queue. -->
<max_update_queue_size>100000</max_update_queue_size>
<!-- Max timeout in milliseconds for push update task into queue. -->
<update_queue_push_timeout_milliseconds>10</update_queue_push_timeout_milliseconds>
<!-- Max wait timeout in milliseconds for update task to complete. -->
<query_wait_timeout_milliseconds>60000</query_wait_timeout_milliseconds>
<!-- Max threads for cache dictionary update. -->
<max_threads_for_updates>4</max_threads_for_updates>
</cache>
</layout>
```
@ -315,7 +329,7 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
### ssd_cache {#ssd-cache}
Similar to `cache`, but stores data on SSD and index in RAM.
Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries.
``` xml
<layout>

View File

@ -85,7 +85,7 @@ hex(arg)
The function is using uppercase letters `A-F` and not using any prefixes (like `0x`) or suffixes (like `h`).
For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big endian or “human readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if leading digit is zero.
For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero.
**Example**
@ -105,7 +105,7 @@ Values of type `Date` and `DateTime` are formatted as corresponding integers (th
For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted.
Values of floating point and Decimal types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
**Arguments**
@ -206,6 +206,141 @@ Result:
└──────┘
```
## bin {#bin}
Returns a string containing the arguments binary representation.
Alias: `BIN`.
**Syntax**
``` sql
bin(arg)
```
For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero.
**Example**
Query:
``` sql
SELECT bin(1);
```
Result:
``` text
00000001
```
Values of type `Date` and `DateTime` are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
For `String` and `FixedString`, all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted.
Values of floating-point and Decimal types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
**Arguments**
- `arg` — A value to convert to binary. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- A string with the binary representation of the argument.
Type: `String`.
**Example**
Query:
``` sql
SELECT bin(toFloat32(number)) as bin_presentation FROM numbers(15, 2);
```
Result:
``` text
┌─bin_presentation─────────────────┐
│ 00000000000000000111000001000001 │
│ 00000000000000001000000001000001 │
└──────────────────────────────────┘
```
Query:
``` sql
SELECT bin(toFloat64(number)) as bin_presentation FROM numbers(15, 2);
```
Result:
``` text
┌─bin_presentation─────────────────────────────────────────────────┐
│ 0000000000000000000000000000000000000000000000000010111001000000 │
│ 0000000000000000000000000000000000000000000000000011000001000000 │
└──────────────────────────────────────────────────────────────────┘
```
## unbin {#unbinstr}
Performs the opposite operation of [bin](#bin). It interprets each pair of binary digits (in the argument) as a number and converts it to the byte represented by the number. The return value is a binary string (BLOB).
If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions.
!!! note "Note"
If `unbin` is invoked from within the `clickhouse-client`, binary strings display using UTF-8.
Alias: `UNBIN`.
**Syntax**
``` sql
unbin(arg)
```
**Arguments**
- `arg` — A string containing any number of binary digits. Type: [String](../../sql-reference/data-types/string.md).
Supports binary digits `0-1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isnt thrown). For a numeric argument the inverse of bin(N) is not performed by unbin().
**Returned value**
- A binary string (BLOB).
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT UNBIN('001100000011000100110010'), UNBIN('0100110101111001010100110101000101001100');
```
Result:
``` text
┌─unbin('001100000011000100110010')─┬─unbin('0100110101111001010100110101000101001100')─┐
│ 012 │ MySQL │
└───────────────────────────────────┴───────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT reinterpretAsUInt64(reverse(unbin('1010'))) AS num;
```
Result:
``` text
┌─num─┐
│ 10 │
└─────┘
```
## UUIDStringToNum(str) {#uuidstringtonumstr}
Accepts a string containing 36 characters in the format `123e4567-e89b-12d3-a456-426655440000`, and returns it as a set of bytes in a FixedString(16).

View File

@ -0,0 +1,132 @@
---
toc_priority: 67
toc_title: NLP
---
# [experimental] Natural Language Processing functions {#nlp-functions}
!!! warning "Warning"
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
## stem {#stem}
Performs stemming on a given word.
**Syntax**
``` sql
stem('language', word)
```
**Arguments**
- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Query:
``` sql
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
```
Result:
``` text
┌─res────────────────────────────────────────────────┐
│ ['I','think','it','is','a','bless','in','disguis'] │
└────────────────────────────────────────────────────┘
```
## lemmatize {#lemmatize}
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
**Syntax**
``` sql
lemmatize('language', word)
```
**Arguments**
- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string).
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Query:
``` sql
SELECT lemmatize('en', 'wolves');
```
Result:
``` text
┌─lemmatize("wolves")─┐
│ "wolf" │
└─────────────────────┘
```
Configuration:
``` xml
<lemmatizers>
<lemmatizer>
<lang>en</lang>
<path>en.bin</path>
</lemmatizer>
</lemmatizers>
```
## synonyms {#synonyms}
Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`.
With the `plain` extension type we need to provide a path to a simple text file, where each line corresponds to a certain synonym set. Words in this line must be separated with space or tab characters.
With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.
**Syntax**
``` sql
synonyms('extension_name', word)
```
**Arguments**
- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Query:
``` sql
SELECT synonyms('list', 'important');
```
Result:
``` text
┌─synonyms('list', 'important')────────────┐
│ ['important','big','critical','crucial'] │
└──────────────────────────────────────────┘
```
Configuration:
``` xml
<synonyms_extensions>
<extension>
<name>en</name>
<type>plain</type>
<path>en.txt</path>
</extension>
<extension>
<name>en</name>
<type>wordnet</type>
<path>en/</path>
</extension>
</synonyms_extensions>
```

View File

@ -145,6 +145,72 @@ Result:
└────────────────────────────┘
```
## splitByWhitespace(s) {#splitbywhitespaceseparator-s}
Splits a string into substrings separated by whitespace characters.
Returns an array of selected substrings.
**Syntax**
``` sql
splitByWhitespace(s)
```
**Arguments**
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
**Returned value(s)**
Returns an array of selected substrings.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
**Example**
``` sql
SELECT splitByWhitespace(' 1! a, b. ');
```
``` text
┌─splitByWhitespace(' 1! a, b. ')─┐
│ ['1!','a,','b.'] │
└─────────────────────────────────────┘
```
## splitByNonAlpha(s) {#splitbynonalphaseparator-s}
Splits a string into substrings separated by whitespace and punctuation characters.
Returns an array of selected substrings.
**Syntax**
``` sql
splitByNonAlpha(s)
```
**Arguments**
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
**Returned value(s)**
Returns an array of selected substrings.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
**Example**
``` sql
SELECT splitByNonAlpha(' 1! a, b. ');
```
``` text
┌─splitByNonAlpha(' 1! a, b. ')─┐
│ ['1','a','b'] │
└───────────────────────────────────┘
```
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Concatenates the strings listed in the array with the separator.separator is an optional parameter: a constant string, set to an empty string by default.

View File

@ -13,13 +13,14 @@ toc_title: Strings
Returns 1 for an empty string or 0 for a non-empty string.
The result type is UInt8.
A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte.
The function also works for arrays.
The function also works for arrays or UUID.
UUID is empty if it is all zeros (nil UUID).
## notEmpty {#notempty}
Returns 0 for an empty string or 1 for a non-empty string.
The result type is UInt8.
The function also works for arrays.
The function also works for arrays or UUID.
## length {#length}

View File

@ -20,12 +20,11 @@ The following actions are supported:
- [ADD COLUMN](#alter_add-column) — Adds a new column to the table.
- [DROP COLUMN](#alter_drop-column) — Deletes the column.
- [RENAME COLUMN](#alter_rename-column) — Renames the column.
- [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
- [CLEAR COLUMN](#alter_clear-column) — Resets column values.
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
- [MODIFY COLUMN](#alter_modify-column) — Changes columns type, default expression and TTL.
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
- [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
These actions are described in detail below.
@ -35,7 +34,7 @@ These actions are described in detail below.
ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
```
Adds a new column to the table with the specified `name`, `type`, [`codec`](../../../sql-reference/statements/create/table.md#codecs) and `default_expr` (see the section [Default expressions](../../../sql-reference/statements/create/table.md#create-default-values)).
Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md#codecs) and `default_expr` (see the section [Default expressions](../../../sql-reference/statements/create/table.md#create-default-values)).
If the `IF NOT EXISTS` clause is included, the query wont return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
@ -64,6 +63,7 @@ Added2 UInt32
ToDrop UInt32
Added3 UInt32
```
## DROP COLUMN {#alter_drop-column}
``` sql
@ -118,7 +118,7 @@ ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple()
## COMMENT COLUMN {#alter_comment-column}
``` sql
COMMENT COLUMN [IF EXISTS] name 'comment'
COMMENT COLUMN [IF EXISTS] name 'Text comment'
```
Adds a comment to the column. If the `IF EXISTS` clause is specified, the query wont return an error if the column does not exist.
@ -136,7 +136,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for
## MODIFY COLUMN {#alter_modify-column}
``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
```
This query changes the `name` column properties:
@ -145,8 +145,12 @@ This query changes the `name` column properties:
- Default expression
- Compression Codec
- TTL
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md#codecs).
For examples of columns TTL modifying, see [Column TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
If the `IF EXISTS` clause is specified, the query wont return an error if the column does not exist.
@ -179,6 +183,8 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
**Example**
Remove TTL:
```sql
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
```
@ -187,22 +193,6 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
- [REMOVE TTL](ttl.md).
## RENAME COLUMN {#alter_rename-column}
Renames an existing column.
Syntax:
```sql
ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name
```
**Example**
```sql
ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
```
## Limitations {#alter-query-limitations}
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
@ -213,4 +203,4 @@ If the `ALTER` query is not sufficient to make the table changes you need, you c
The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.
For tables that do not store data themselves (such as `Merge` and `Distributed`), `ALTER` just changes the table structure, and does not change the structure of subordinate tables. For example, when running ALTER for a `Distributed` table, you will also need to run `ALTER` for the tables on all remote servers.
For tables that do not store data themselves (such as [Merge](../../../sql-reference/statements/alter/index.md) and [Distributed](../../../sql-reference/statements/alter/index.md)), `ALTER` just changes the table structure, and does not change the structure of subordinate tables. For example, when running ALTER for a `Distributed` table, you will also need to run `ALTER` for the tables on all remote servers.

View File

@ -0,0 +1,60 @@
---
toc_priority: 38
toc_title: SETTING
---
# Table Settings Manipulations {#table_settings_manipulations}
There is a set of queries to change table settings. You can modify settings or reset them to default values. A single query can change several settings at once.
If a setting with the specified name does not exist, then the query raises an exception.
**Syntax**
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
```
!!! note "Note"
These queries can be applied to [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) tables only.
## MODIFY SETTING {#alter_modify_setting}
Changes table settings.
**Syntax**
```sql
MODIFY SETTING setting_name=value [, ...]
```
**Example**
```sql
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id;
ALTER TABLE example_table MODIFY SETTING max_part_loading_threads=8, max_parts_in_total=50000;
```
## RESET SETTING {#alter_reset_setting}
Resets table settings to their default values. If a setting is in a default state, then no action is taken.
**Syntax**
```sql
RESET SETTING setting_name [, ...]
```
**Example**
```sql
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id
SETTINGS max_part_loading_threads=8;
ALTER TABLE example_table RESET SETTING max_part_loading_threads;
```
**See Also**
- [MergeTree settings](../../../operations/settings/merge-tree-settings.md)

View File

@ -254,6 +254,20 @@ CREATE TABLE codec_example
ENGINE = MergeTree()
```
### Encryption Codecs {#create-query-encryption-codecs}
These codecs don't actually compress data, but instead encrypt data on disk. These are only available when an encryption key is specified by [encryption](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption) settings. Note that encryption only makes sense at the end of codec pipelines, because encrypted data usually can't be compressed in any meaningful way.
Encryption codecs:
- `Encrypted('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode. This codec uses a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence).
!!! attention "Attention"
Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed.
!!! attention "Attention"
If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging.
## Temporary Tables {#temporary-tables}
ClickHouse supports temporary tables which have the following characteristics:

View File

@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...]]
[DEFAULT DATABASE database | NONE]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
```

View File

@ -274,28 +274,28 @@ This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../
`WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters.
All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults.
Use following syntax for filling multiple columns add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
To fill multiple columns, add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
``` sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` can be applied only for fields with Numeric (all kind of float, decimal, int) or Date/DateTime types.
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
For example, the following query
Example of a query without `WITH FILL`:
``` sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
) ORDER BY n;
```
returns
Result:
``` text
┌─n─┬─source───┐
@ -305,16 +305,16 @@ returns
└───┴──────────┘
```
but after apply `WITH FILL` modifier
Same query after applying `WITH FILL` modifier:
``` sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5;
```
returns
Result:
``` text
┌───n─┬─source───┐
@ -334,7 +334,7 @@ returns
└─────┴──────────┘
```
For the case when we have multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in `ORDER BY` clause.
For the case with multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in the `ORDER BY` clause.
Example:
@ -350,7 +350,7 @@ ORDER BY
d1 WITH FILL STEP 5;
```
returns
Result:
``` text
┌───d1───────┬───d2───────┬─source───┐
@ -364,9 +364,9 @@ returns
└────────────┴────────────┴──────────┘
```
Field `d1` does not fill and use default value cause we do not have repeated values for `d2` value, and sequence for `d1` cant be properly calculated.
Field `d1` does not fill in and use the default value cause we do not have repeated values for `d2` value, and the sequence for `d1` cant be properly calculated.
The following query with a changed field in `ORDER BY`
The following query with the changed field in `ORDER BY`:
``` sql
SELECT
@ -380,7 +380,7 @@ ORDER BY
d2 WITH FILL;
```
returns
Result:
``` text
┌───d1───────┬───d2───────┬─source───┐

View File

@ -45,7 +45,7 @@ toc_title: "\u7D71\u5408"
- 監視
- [黒鉛](https://graphiteapp.org)
- [グラファウス](https://github.com/yandex/graphouse)
- [カーボンクリックハウス](https://github.com/lomik/carbon-clickhouse) +
- [カーボンクリックハウス](https://github.com/lomik/carbon-clickhouse)
- [グラファイト-クリック](https://github.com/lomik/graphite-clickhouse)
- [黒鉛-ch-オプティマイザー](https://github.com/innogames/graphite-ch-optimizer) -staled仕切りを最大限に活用する [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) からのルールの場合 [ロールアップ構成](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) 応用できます
- [グラファナ](https://grafana.com/)

View File

@ -38,3 +38,15 @@ toc_title: "Используемые сторонние библиотеки"
| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) |
| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) |
| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) |
## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries}
1. Весь внешний сторонний код должен находиться в отдельных папках внутри папки `contrib` репозитория ClickHouse. По возможности, используйте сабмодули Git.
2. Клонируйте официальный репозиторий [Clickhouse-extras](https://github.com/ClickHouse-Extras). Используйте официальные репозитории GitHub, если они доступны.
3. Создавайте новую ветку на основе той ветки, которую вы хотите интегрировать: например, `master` -> `clickhouse/master` или `release/vX.Y.Z` -> `clickhouse/release/vX.Y.Z`.
4. Все копии [Clickhouse-extras](https://github.com/ClickHouse-Extras) можно автоматически синхронизировать с удаленными репозиториями. Ветки `clickhouse/...` останутся незатронутыми, поскольку скорее всего никто не будет использовать этот шаблон именования в своих репозиториях.
5. Добавьте сабмодули в папку `contrib` репозитория ClickHouse, на который ссылаются клонированные репозитории. Настройте сабмодули для отслеживания изменений в соответствующих ветках `clickhouse/...`.
6. Каждый раз, когда необходимо внести изменения в код библиотеки, следует создавать отдельную ветку, например `clickhouse/my-fix`. Затем эта ветка должна быть слита (`merge`) в ветку, отслеживаемую сабмодулем, например, в `clickhouse/master` или `clickhouse/release/vX.Y.Z`.
7. Не добавляйте код в клоны репозитория [Clickhouse-extras](https://github.com/ClickHouse-Extras), если имя ветки не соответствует шаблону `clickhouse/...`.
8. Всегда вносите изменения с учетом того, что они попадут в официальный репозиторий. После того как PR будет влит из (ветки разработки/исправлений) вашего личного клона репозитория в [Clickhouse-extras](https://github.com/ClickHouse-Extras), и сабмодуль будет добавлен в репозиторий ClickHouse, рекомендуется сделать еще один PR из (ветки разработки/исправлений) репозитория [Clickhouse-extras](https://github.com/ClickHouse-Extras) в официальный репозиторий библиотеки. Таким образом будут решены следующие задачи: 1) публикуемый код может быть использован многократно и будет иметь более высокую ценность; 2) другие пользователи также смогут использовать его в своих целях; 3) поддержкой кода будут заниматься не только разработчики ClickHouse.
9. Чтобы сабмодуль начал использовать новый код из исходной ветки (например, `master`), сначала следует аккуратно выполнить слияние (`master` -> `clickhouse/master`), и только после этого изменения могут быть добавлены в основной репозиторий ClickHouse. Это связано с тем, что в отслеживаемую ветку (например, `clickhouse/master`) могут быть внесены изменения, и поэтому ветка может отличаться от первоисточника (`master`).

View File

@ -92,7 +92,7 @@ ClickHouse не работает и не собирается на 32-битны
# Две последние команды могут быть объединены вместе:
git submodule update --init
The next commands would help you to reset all submodules to the initial state (!WARING! - any chenges inside will be deleted):
The next commands would help you to reset all submodules to the initial state (!WARING! - any changes inside will be deleted):
Следующие команды помогут сбросить все сабмодули в изначальное состояние (!ВНИМАНИЕ! - все изменения в сабмодулях будут утеряны):
# Synchronizes submodules' remote URL with .gitmodules
@ -242,6 +242,8 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
Стиль кода: https://clickhouse.tech/docs/ru/development/style/
Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений: https://clickhouse.tech/docs/ru/development/contrib/#adding-third-party-libraries
Разработка тестов: https://clickhouse.tech/docs/ru/development/tests/
Список задач: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3A%22easy+task%22

View File

@ -820,11 +820,11 @@ The dictionary is configured incorrectly.
**10.** Ненужный код удаляется из исходников.
## Библиотеки {#biblioteki}
## Библиотеки {#libraries}
**1.** Используются стандартная библиотека C++20 (допустимо использовать экспериментальные расширения) а также фреймворки `boost`, `Poco`.
**1.** Используются стандартные библиотеки C++20 (допустимо использовать экспериментальные расширения), а также фреймворки `boost`, `Poco`.
**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС или любые другие способы установки библиотек в систему.
**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС, или любые другие способы установки библиотек в систему. Подробнее смотрите раздел [Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений](contrib.md#adding-third-party-libraries).
**3.** Предпочтение отдаётся уже использующимся библиотекам.
@ -902,4 +902,3 @@ function(
const & RangesInDataParts ranges,
size_t limit)
```

View File

@ -14,7 +14,7 @@ toc_title: "Введение"
- [MySQL](../../engines/database-engines/mysql.md)
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)

View File

@ -1,22 +1,22 @@
---
toc_priority: 29
toc_title: MaterializeMySQL
toc_title: MaterializedMySQL
---
# MaterializeMySQL {#materialize-mysql}
# MaterializedMySQL {#materialized-mysql}
Создает базу данных ClickHouse со всеми таблицами, существующими в MySQL, и всеми данными в этих таблицах.
Сервер ClickHouse работает как реплика MySQL. Он читает файл binlog и выполняет DDL and DML-запросы.
`MaterializeMySQL` — экспериментальный движок баз данных.
`MaterializedMySQL` — экспериментальный движок баз данных.
## Создание базы данных {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
```
**Параметры движка**
@ -28,7 +28,7 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor
## Виртуальные столбцы {#virtual-columns}
При работе с движком баз данных `MaterializeMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`.
При работе с движком баз данных `MaterializedMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`.
- `_version` — счетчик транзакций. Тип [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — метка удаления. Тип [Int8](../../sql-reference/data-types/int-uint.md). Возможные значения:
@ -75,9 +75,9 @@ DDL-запросы в MySQL конвертируются в соответств
- Запрос `UPDATE` конвертируется в ClickHouse в `INSERT` с `_sign=-1` и `INSERT` с `_sign=1`.
### Выборка из таблиц движка MaterializeMySQL {#select}
### Выборка из таблиц движка MaterializedMySQL {#select}
Запрос `SELECT` из таблиц движка `MaterializeMySQL` имеет некоторую специфику:
Запрос `SELECT` из таблиц движка `MaterializedMySQL` имеет некоторую специфику:
- Если в запросе `SELECT` напрямую не указан столбец `_version`, то используется модификатор [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираются только строки с `MAX(_version)`.
@ -94,10 +94,10 @@ DDL-запросы в MySQL конвертируются в соответств
**Примечание**
- Строки с `_sign=-1` физически не удаляются из таблиц.
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializedMySQL`.
- Репликация может быть легко нарушена.
- Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
- Прямые операции изменения данных в таблицах и базах данных `MaterializedMySQL` запрещены.
- На работу `MaterializedMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializedMySQL`.
## Примеры использования {#examples-of-use}
@ -126,7 +126,7 @@ mysql> SELECT * FROM test;
База данных и созданная таблица:
``` sql
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***');
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```

View File

@ -14,6 +14,8 @@ toc_priority: 29
- [Log](log.md)
- [TinyLog](tinylog.md)
Табличные движки семейства `Log` могут хранить данные в распределенных файловых системах [HDFS](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-hdfs) или [S3](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3).
## Общие свойства {#obshchie-svoistva}
Движки:

View File

@ -5,9 +5,8 @@ toc_title: Log
# Log {#log}
Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](index.md).
Отличается от [TinyLog](tinylog.md) тем, что вместе с файлами столбцов лежит небольшой файл «засечек». Засечки пишутся на каждый блок данных и содержат смещение - с какого места нужно читать файл, чтобы пропустить заданное количество строк. Это позволяет читать данные из таблицы в несколько потоков.
При конкурентном доступе к данным, чтения могут выполняться одновременно, а записи блокируют чтения и друг друга.
Движок Log не поддерживает индексы. Также, если при записи в таблицу произошёл сбой, то таблица станет битой, и чтения из неё будут возвращать ошибку. Движок Log подходит для временных данных, write-once таблиц, а также для тестовых и демонстрационных целей.
Движок относится к семейству движков `Log`. Смотрите общие свойства и различия движков в статье [Семейство Log](../../../engines/table-engines/log-family/index.md).
Отличается от [TinyLog](../../../engines/table-engines/log-family/tinylog.md) тем, что вместе с файлами столбцов лежит небольшой файл "засечек". Засечки пишутся на каждый блок данных и содержат смещение: с какого места нужно читать файл, чтобы пропустить заданное количество строк. Это позволяет читать данные из таблицы в несколько потоков.
При конкурентном доступе к данным чтения могут выполняться одновременно, а записи блокируют чтения и друг друга.
Движок `Log` не поддерживает индексы. Также, если при записи в таблицу произошёл сбой, то таблица станет битой, и чтения из нее будут возвращать ошибку. Движок `Log` подходит для временных данных, write-once таблиц, а также для тестовых и демонстрационных целей.

View File

@ -771,7 +771,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
- `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/cache/`.
- `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`.
Диск S3 может быть сконфигурирован как `main` или `cold`:
``` xml
@ -810,3 +809,44 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`.
## Использование сервиса HDFS для хранения данных {#table_engine-mergetree-hdfs}
[HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) — это распределенная файловая система для удаленного хранения данных.
Таблицы семейства `MergeTree` могут хранить данные в сервисе HDFS при использовании диска типа `HDFS`.
Пример конфигурации:
``` xml
<yandex>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
</hdfs>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
```
Обязательные параметры:
- `endpoint` — URL точки приема запроса на стороне HDFS в формате `path`. URL точки должен содержать путь к корневой директории на сервере, где хранятся данные.
Необязательные параметры:
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.

View File

@ -43,7 +43,7 @@ toc_title: "Библиотеки для интеграции от сторонн
- Мониторинг
- [Graphite](https://graphiteapp.org)
- [graphouse](https://github.com/yandex/graphouse)
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) +
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse)
- [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse)
- [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - оптимизирует партиции таблиц [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) согласно правилам в [конфигурации rollup](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration)
- [Grafana](https://grafana.com/)

View File

@ -277,4 +277,15 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: `0`.
[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->
## check_sample_column_is_correct {#check_sample_column_is_correct}
Разрешает проверку того, что тип данных столбца для сэмплирования или выражения сэмплирования при создании таблицы верный. Тип данных должен соответствовать одному из беззнаковых [целочисленных типов](../../sql-reference/data-types/int-uint.md): `UInt8`, `UInt16`, `UInt32`, `UInt64`.
Возможные значения:
- true — проверка включена.
- false — проверка при создании таблицы не проводится.
Значение по умолчанию: `true`.
По умолчанию сервер ClickHouse при создании таблицы проверяет тип данных столбца для сэмплирования или выражения сэмплирования. Если уже существуют таблицы с некорректным выражением сэмплирования, то чтобы не возникало исключение при запуске сервера, установите `check_sample_column_is_correct` в значение `false`.

View File

@ -25,6 +25,30 @@ ClickHouse применяет настройку в тех случаях, ко
- `global` — заменяет запрос `IN`/`JOIN` на `GLOBAL IN`/`GLOBAL JOIN.`
- `allow` — разрешает использование таких подзапросов.
## prefer_global_in_and_join {#prefer-global-in-and-join}
Заменяет запрос `IN`/`JOIN` на `GLOBAL IN`/`GLOBAL JOIN`.
Возможные значения:
- 0 — выключена. Операторы `IN`/`JOIN` не заменяются на `GLOBAL IN`/`GLOBAL JOIN`.
- 1 — включена. Операторы `IN`/`JOIN` заменяются на `GLOBAL IN`/`GLOBAL JOIN`.
Значение по умолчанию: `0`.
**Использование**
Настройка `SET distributed_product_mode=global` меняет поведение запросов для распределенных таблиц, но она не подходит для локальных таблиц или таблиц из внешних источников. В этих случаях удобно использовать настройку `prefer_global_in_and_join`.
Например, если нужно объединить все данные из локальных таблиц, которые находятся на разных узлах — для распределенной обработки необходим `GLOBAL JOIN`.
Другой вариант использования настройки `prefer_global_in_and_join` — регулирование обращений к таблицам из внешних источников.
Эта настройка помогает уменьшить количество обращений к внешним ресурсам при объединении внешних таблиц: только один вызов на весь распределенный запрос.
**См. также:**
- [Распределенные подзапросы](../../sql-reference/operators/in.md#select-distributed-subqueries) `GLOBAL IN`/`GLOBAL JOIN`
## enable_optimize_predicate_expression {#enable-optimize-predicate-expression}
Включает пробрасывание предикатов в подзапросы для запросов `SELECT`.
@ -2777,7 +2801,7 @@ SELECT * FROM test2;
└─────────────┘
```
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md).
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}

View File

@ -35,4 +35,3 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
- [system.events](#system_tables-events) — таблица с количеством произошедших событий.
- [system.metric_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`.

View File

@ -61,4 +61,3 @@ exception_code: ZOK
2 rows in set. Elapsed: 0.025 sec.
```

View File

@ -51,6 +51,7 @@ ClickHouse не удаляет данные из таблица автомати
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе.
- `projections` ([String](../../sql-reference/data-types/string.md)) — имена проекций, использованных при выполнении запроса.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.
- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения, если запрос завершился по исключению.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [stack trace](https://en.wikipedia.org/wiki/Stack_trace). Пустая строка, если запрос успешно завершен.
@ -65,6 +66,8 @@ ClickHouse не удаляет данные из таблица автомати
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID родительского запроса.
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел родительский запрос.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт, с которого клиент сделал родительский запрос.
- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса (для распределенных запросов).
- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд (для распределенных запросов).
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — интерфейс, с которого ушёл запрос. Возможные значения:
- 1 — TCP.
- 2 — HTTP.
@ -101,55 +104,77 @@ ClickHouse не удаляет данные из таблица автомати
**Пример**
``` sql
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
type: QueryFinish
event_date: 2021-07-28
event_time: 2021-07-28 13:46:56
event_time_microseconds: 2021-07-28 13:46:56.719791
query_start_time: 2021-07-28 13:46:56
query_start_time_microseconds: 2021-07-28 13:46:56.704542
query_duration_ms: 14
read_rows: 8393
read_bytes: 374325
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
result_rows: 4201
result_bytes: 153024
memory_usage: 4714038
current_database: default
query: INSERT INTO test1 VALUES
query: SELECT DISTINCT arrayJoin(extractAll(name, '[\\w_]{2,}')) AS res FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.table_engines UNION ALL SELECT name FROM system.formats UNION ALL SELECT name FROM system.table_functions UNION ALL SELECT name FROM system.data_type_families UNION ALL SELECT name FROM system.merge_tree_settings UNION ALL SELECT name FROM system.settings UNION ALL SELECT cluster FROM system.clusters UNION ALL SELECT macro FROM system.macros UNION ALL SELECT policy_name FROM system.storage_policies UNION ALL SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate UNION ALL SELECT name FROM system.databases LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.tables LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.dictionaries LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.columns LIMIT 10000) WHERE notEmpty(res)
normalized_query_hash: 6666026786019643712
query_kind: Select
databases: ['system']
tables: ['system.aggregate_function_combinators','system.clusters','system.columns','system.data_type_families','system.databases','system.dictionaries','system.formats','system.functions','system.macros','system.merge_tree_settings','system.settings','system.storage_policies','system.table_engines','system.table_functions','system.tables']
columns: ['system.aggregate_function_combinators.name','system.clusters.cluster','system.columns.name','system.data_type_families.name','system.databases.name','system.dictionaries.name','system.formats.name','system.functions.is_aggregate','system.functions.name','system.macros.macro','system.merge_tree_settings.name','system.settings.name','system.storage_policies.policy_name','system.table_engines.name','system.table_functions.name','system.tables.name']
projections: []
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
address: ::ffff:127.0.0.1
port: 33452
port: 51006
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
initial_address: ::ffff:127.0.0.1
initial_port: 33452
initial_port: 51006
initial_query_start_time: 2021-07-28 13:46:56
initial_query_start_time_microseconds: 2021-07-28 13:46:56.704542
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
os_user:
client_hostname:
client_name: ClickHouse client
client_revision: 54449
client_version_major: 21
client_version_minor: 8
client_version_patch: 0
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54440
thread_ids: []
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
revision: 54453
log_comment:
thread_ids: [5058,22097,22110,22094]
ProfileEvents.Names: ['Query','SelectQuery','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSWriteChars']
ProfileEvents.Values: [1,1,39,352256,64,360,8393,374325,412,440,34480,13108,4723,671,19,17828,8192,10240]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: []
used_aggregate_function_combinators: []
used_database_engines: []
used_data_type_families: ['UInt64','UInt8','Nullable','String','date']
used_dictionaries: []
used_formats: []
used_functions: ['concat','notEmpty','extractAll']
used_storages: []
used_table_functions: []
```
**Смотрите также**

View File

@ -0,0 +1,132 @@
---
toc_priority: 67
toc_title: NLP
---
# [экспериментально] Функции для работы с ествественным языком {#nlp-functions}
!!! warning "Предупреждение"
Сейчас использование функций для работы с ествественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
## stem {#stem}
Данная функция проводит стемминг заданного слова.
**Синтаксис**
``` sql
stem('language', word)
```
**Аргументы**
- `language` — Язык, правила которого будут применены для стемминга. Допускается только нижний регистр. [String](../../sql-reference/data-types/string.md#string).
- `word` — Слово подлежащее стеммингу. Допускается только нижний регистр. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Query:
``` sql
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
```
Result:
``` text
┌─res────────────────────────────────────────────────┐
│ ['I','think','it','is','a','bless','in','disguis'] │
└────────────────────────────────────────────────────┘
```
## lemmatize {#lemmatize}
Данная функция проводит лемматизацию для заданного слова. Для работы лемматизатора необходимы словари, которые можно найти [здесь](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
**Синтаксис**
``` sql
lemmatize('language', word)
```
**Аргументы**
- `language` — Язык, правила которого будут применены для лемматизации. [String](../../sql-reference/data-types/string.md#string).
- `word` — Слово, подлежащее лемматизации. Допускается только нижний регистр. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Запрос:
``` sql
SELECT lemmatize('en', 'wolves');
```
Результат:
``` text
┌─lemmatize("wolves")─┐
│ "wolf" │
└─────────────────────┘
```
Конфигурация:
``` xml
<lemmatizers>
<lemmatizer>
<lang>en</lang>
<path>en.bin</path>
</lemmatizer>
</lemmatizers>
```
## synonyms {#synonyms}
Находит синонимы к заданному слову. Представлены два типа расширений словарей: `plain` и `wordnet`.
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соотвествует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
Для работы расширения типа `plain` необходимо указать путь до WordNet тезауруса. Тезаурус должен содержать WordNet sense index.
**Синтаксис**
``` sql
synonyms('extension_name', word)
```
**Аргументы**
- `extension_name` — Название расширения, в котором будет проводиться поиск. [String](../../sql-reference/data-types/string.md#string).
- `word` — Слово, которое будет искаться в расширении. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Запрос:
``` sql
SELECT synonyms('list', 'important');
```
Результат:
``` text
┌─synonyms('list', 'important')────────────┐
│ ['important','big','critical','crucial'] │
└──────────────────────────────────────────┘
```
Конфигурация:
``` xml
<synonyms_extensions>
<extension>
<name>en</name>
<type>plain</type>
<path>en.txt</path>
</extension>
<extension>
<name>en</name>
<type>wordnet</type>
<path>en/</path>
</extension>
</synonyms_extensions>
```

View File

@ -146,6 +146,70 @@ SELECT splitByRegexp('', 'abcde');
└────────────────────────────┘
```
## splitByWhitespace(s) {#splitbywhitespaceseparator-s}
Разбивает строку на подстроки, используя в качестве разделителей пробельные символы.
**Синтаксис**
``` sql
splitByWhitespace(s)
```
**Аргументы**
- `s` — разбиваемая строка. [String](../../sql-reference/data-types/string.md).
**Возвращаемые значения**
Возвращает массив подстрок.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
**Пример**
``` sql
SELECT splitByWhitespace(' 1! a, b. ');
```
``` text
┌─splitByWhitespace(' 1! a, b. ')─┐
│ ['1!','a,','b.'] │
└─────────────────────────────────────┘
```
## splitByNonAlpha(s) {#splitbynonalphaseparator-s}
Разбивает строку на подстроки, используя в качестве разделителей пробельные символы и символы пунктуации.
**Синтаксис**
``` sql
splitByNonAlpha(s)
```
**Аргументы**
- `s` — разбиваемая строка. [String](../../sql-reference/data-types/string.md).
**Возвращаемые значения**
Возвращает массив подстрок.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
**Пример**
``` sql
SELECT splitByNonAlpha(' 1! a, b. ');
```
``` text
┌─splitByNonAlpha(' 1! a, b. ')─┐
│ ['1','a','b'] │
└───────────────────────────────────┘
```
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Склеивает строки, перечисленные в массиве, с разделителем separator.

View File

@ -5,15 +5,26 @@ toc_title: "Манипуляции со столбцами"
# Манипуляции со столбцами {#manipuliatsii-so-stolbtsami}
Набор действий, позволяющих изменять структуру таблицы.
Синтаксис:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ...
```
В запросе можно указать сразу несколько действий над одной таблицей через запятую.
Каждое действие — это манипуляция над столбцом.
Существуют следующие действия:
- [ADD COLUMN](#alter_add-column) — добавляет столбец в таблицу;
- [DROP COLUMN](#alter_drop-column) — удаляет столбец;
- [RENAME COLUMN](#alter_rename-column) — переименовывает существующий столбец.
- [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
- [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
- [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
- [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.
- [RENAME COLUMN](#alter_rename-column) — переименовывает существующий столбец.
Подробное описание для каждого действия приведено ниже.
@ -72,6 +83,22 @@ DROP COLUMN [IF EXISTS] name
ALTER TABLE visits DROP COLUMN browser
```
## RENAME COLUMN {#alter_rename-column}
``` sql
RENAME COLUMN [IF EXISTS] name to new_name
```
Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно.
**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`.
Пример:
``` sql
ALTER TABLE visits RENAME COLUMN webBrowser TO browser
```
## CLEAR COLUMN {#alter_clear-column}
``` sql
@ -109,7 +136,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'Столбец показывает,
## MODIFY COLUMN {#alter_modify-column}
``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
```
Запрос изменяет следующие свойства столбца `name`:
@ -118,11 +145,15 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | F
- Значение по умолчанию
- Кодеки сжатия
- TTL
Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
Примеры изменения кодеков сжатия смотрите в разделе [Кодеки сжатия столбцов](../create/table.md#codecs).
Если указано `IF EXISTS`, запрос не возвращает ошибку, если столбца не существует.
Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
Если указано `IF EXISTS`, запрос не возвращает ошибку при условии, что столбец не существует.
Запрос также может изменять порядок столбцов при помощи `FIRST | AFTER`, смотрите описание [ADD COLUMN](#alter_add-column).
@ -162,22 +193,6 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
- [REMOVE TTL](ttl.md).
## RENAME COLUMN {#alter_rename-column}
Переименовывает существующий столбец.
Синтаксис:
```sql
ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name
```
**Пример**
```sql
ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
```
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.
@ -186,7 +201,6 @@ ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть, если на момент запроса `ALTER`, выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время, все новые запросы к той же таблице, будут ждать, пока завершится этот `ALTER`.
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.
Для таблиц, которые не хранят данные самостоятельно (типа [Merge](../../../sql-reference/statements/alter/index.md) и [Distributed](../../../sql-reference/statements/alter/index.md)), `ALTER` всего лишь меняет структуру таблицы, но не меняет структуру подчинённых таблиц. Для примера, при ALTER-е таблицы типа `Distributed`, вам также потребуется выполнить запрос `ALTER` для таблиц на всех удалённых серверах.

View File

@ -0,0 +1,60 @@
---
toc_priority: 38
toc_title: SETTING
---
# Изменение настроек таблицы {#table_settings_manipulations}
Существуют запросы, которые изменяют настройки таблицы или сбрасывают их в значения по умолчанию. В одном запросе можно изменить сразу несколько настроек.
Если настройка с указанным именем не существует, то генерируется исключение.
**Синтаксис**
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
```
!!! note "Примечание"
Эти запросы могут применяться только к таблицам на движке [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).
## MODIFY SETTING {#alter_modify_setting}
Изменяет настройки таблицы.
**Синтаксис**
```sql
MODIFY SETTING setting_name=value [, ...]
```
**Пример**
```sql
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id;
ALTER TABLE example_table MODIFY SETTING max_part_loading_threads=8, max_parts_in_total=50000;
```
## RESET SETTING {#alter_reset_setting}
Сбрасывает настройки таблицы в значения по умолчанию. Если настройка уже находится в состоянии по умолчанию, то никакие действия не выполняются.
**Синтаксис**
```sql
RESET SETTING setting_name [, ...]
```
**Пример**
```sql
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id
SETTINGS max_part_loading_threads=8;
ALTER TABLE example_table RESET SETTING max_part_loading_threads;
```
**Смотрите также**
- [Настройки MergeTree таблиц](../../../operations/settings/merge-tree-settings.md)

View File

@ -62,7 +62,7 @@ CREATE TABLE example (
materialized_value UInt32 MATERIALIZED 12345,
aliased_value UInt32 ALIAS 2,
PRIMARY KEY primary_key
) ENGINE=MergeTree 
) ENGINE=MergeTree
PARTITION BY partition_key
ORDER BY (primary_key, secondary_key);
```

View File

@ -271,8 +271,8 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
Этот модификатор также может быть скобинирован с модификатором [LIMIT ... WITH TIES](../../../sql-reference/statements/select/limit.md#limit-with-ties)
`WITH FILL` модификатор может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
Все пропущенные значнеия для колонки `expr` будут заполненые значениями соответсвующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значенями по умолчанию.
Модификатор `WITH FILL` может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
Все пропущенные значения для колонки `expr` будут заполнены значениями, соответствующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значениями по умолчанию.
Используйте следующую конструкцию для заполнения нескольких колонок с модификатором `WITH FILL` с необязательными параметрами после каждого имени поля в секции `ORDER BY`.
@ -280,22 +280,22 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` может быть применене только к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами.
`WITH FILL` может быть применен к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами. В случае применения к полям типа `String` недостающие значения заполняются пустой строкой.
Когда не определен `FROM const_expr`, последовательность заполнения использует минимальное значение поля `expr` из `ORDER BY`.
Когда не определен `TO const_expr`, последовательность заполнения использует максимальное значение поля `expr` из `ORDER BY`.
Когда `STEP const_numeric_expr` определен, тогда `const_numeric_expr` интерпретируется `как есть` для числовых типов, как `дни` для типа Date и как `секунды` для типа DateTime.
Когда `STEP const_numeric_expr` определен, `const_numeric_expr` интерпретируется "как есть" для числовых типов, как "дни" для типа `Date` и как "секунды" для типа `DateTime`.
Когда `STEP const_numeric_expr` не указан, тогда используется `1.0` для числовых типов, `1 день` для типа Date и `1 секунда` для типа DateTime.
Для примера, следующий запрос
Пример запроса без использования `WITH FILL`:
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
) ORDER BY n;
```
возвращает
Результат:
```text
┌─n─┬─source───┐
│ 1 │ original │
@ -304,7 +304,7 @@ SELECT n, source FROM (
└───┴──────────┘
```
но после применения модификатора `WITH FILL`
Тот же запрос после применения модификатора `WITH FILL`:
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
@ -312,7 +312,8 @@ SELECT n, source FROM (
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
```
возвращает
Результат:
```text
┌───n─┬─source───┐
│ 0 │ │
@ -331,7 +332,7 @@ SELECT n, source FROM (
└─────┴──────────┘
```
Для случая когда у нас есть несколько полей `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет следовать порядку полей в секции `ORDER BY`.
Для случая с несколькими полями `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет соответствовать порядку полей в секции `ORDER BY`.
Пример:
```sql
@ -346,7 +347,7 @@ ORDER BY
d1 WITH FILL STEP 5;
```
возвращает
Результат:
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
@ -359,9 +360,9 @@ ORDER BY
└────────────┴────────────┴──────────┘
```
Поле `d1` не заполняет и используется значение по умолчанию поскольку у нас нет повторяющихся значения для `d2` поэтому мы не можем правильно рассчитать последователность заполнения для`d1`.
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
едующий запрос (с измененым порядком в ORDER BY)
едующий запрос (с измененым порядком в ORDER BY):
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
@ -374,7 +375,7 @@ ORDER BY
d2 WITH FILL;
```
возвращает
Результат:
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │

View File

@ -43,7 +43,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- Monitoring
- [Graphite](https://graphiteapp.org)
- [graphouse](https://github.com/yandex/graphouse)
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) +
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse)
- [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse)
- [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied
- [Grafana](https://grafana.com/)

View File

@ -6,4 +6,3 @@ toc_priority: 104
选择遇到的最后一个值。
其结果和[any](../../../sql-reference/aggregate-functions/reference/any.md) 函数一样是不确定的 。

View File

@ -11,7 +11,6 @@ set (CLICKHOUSE_COPIER_LINK
clickhouse_functions
clickhouse_table_functions
clickhouse_aggregate_functions
clickhouse_dictionaries
string_utils
PUBLIC

View File

@ -33,7 +33,7 @@ static std::string extractFromConfig(
{
DB::ConfigurationPtr bootstrap_configuration(new Poco::Util::XMLConfiguration(config_xml));
zkutil::ZooKeeperPtr zookeeper = std::make_shared<zkutil::ZooKeeper>(
*bootstrap_configuration, "zookeeper");
*bootstrap_configuration, "zookeeper", nullptr);
zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; });
config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache);
}

View File

@ -181,15 +181,10 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
else if (method == "loadIds")
{
params.read(request.getStream());
String ids_string;
readString(ids_string, request.getStream());
std::vector<uint64_t> ids = parseIdsFromBinary(ids_string);
if (!params.has("ids"))
{
processError(response, "No 'ids' in request URL");
return;
}
std::vector<uint64_t> ids = parseIdsFromBinary(params.get("ids"));
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
auto input = library_handler->loadIds(ids);

View File

@ -6,6 +6,7 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
@ -17,7 +18,7 @@ SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dic
if (library_handler != library_handlers.end())
return library_handler->second;
return nullptr;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found dictionary with id: {}", dictionary_id);
}

View File

@ -6,7 +6,6 @@ set (CLICKHOUSE_LOCAL_LINK
clickhouse_aggregate_functions
clickhouse_common_config
clickhouse_common_io
clickhouse_dictionaries
clickhouse_functions
clickhouse_parsers
clickhouse_storages_system

View File

@ -322,7 +322,7 @@ struct Checker
{
checkRequiredInstructions();
}
} checker;
} checker __attribute__((init_priority(101))); /// Run before other static initializers.
}

View File

@ -13,7 +13,6 @@ set (CLICKHOUSE_SERVER_LINK
clickhouse_common_config
clickhouse_common_io
clickhouse_common_zookeeper
clickhouse_dictionaries
clickhouse_functions
clickhouse_parsers
clickhouse_storages_system

View File

@ -26,6 +26,7 @@
#include <Common/DNSResolver.h>
#include <Common/CurrentMetrics.h>
#include <Common/Macros.h>
#include <Common/ShellCommand.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
@ -39,6 +40,7 @@
#include <Common/remapExecutable.h>
#include <Common/TLDListsHolder.h>
#include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h>
#include <IO/UseSSL.h>
#include <Interpreters/AsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h>
@ -95,6 +97,9 @@
#endif
#if USE_SSL
# if USE_INTERNAL_SSL_LIBRARY
# include <Compression/CompressionCodecEncrypted.h>
# endif
# include <Poco/Net/Context.h>
# include <Poco/Net/SecureServerSocket.h>
#endif
@ -107,6 +112,10 @@
# include <Server/KeeperTCPHandlerFactory.h>
#endif
#if USE_BASE64
# include <turbob64.h>
#endif
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#endif
@ -242,6 +251,7 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int INCORRECT_DATA;
extern const int INVALID_CONFIG_PARAMETER;
extern const int SYSTEM_ERROR;
extern const int FAILED_TO_GETPWUID;
@ -445,6 +455,39 @@ void checkForUsersNotInMainConfig(
}
}
static void loadEncryptionKey(const std::string & key_command [[maybe_unused]], Poco::Logger * log)
{
#if USE_BASE64 && USE_SSL && USE_INTERNAL_SSL_LIBRARY
auto process = ShellCommand::execute(key_command);
std::string b64_key;
readStringUntilEOF(b64_key, process->out);
process->wait();
// turbob64 doesn't like whitespace characters in input. Strip
// them before decoding.
std::erase_if(b64_key, [](char c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
});
std::vector<char> buf(b64_key.size());
const size_t key_size = tb64dec(reinterpret_cast<const unsigned char *>(b64_key.data()), b64_key.size(),
reinterpret_cast<unsigned char *>(buf.data()));
if (!key_size)
throw Exception("Failed to decode encryption key", ErrorCodes::INCORRECT_DATA);
else if (key_size < 16)
LOG_WARNING(log, "The encryption key should be at least 16 octets long.");
const std::string_view key = std::string_view(buf.data(), key_size);
CompressionCodecEncrypted::setMasterKey(key);
#else
LOG_WARNING(log, "Server was built without Base64 or SSL support. Encryption is disabled.");
#endif
}
[[noreturn]] void forceShutdown()
{
@ -916,6 +959,10 @@ if (ThreadFuzzer::instance().isEffective())
global_context->getMergeTreeSettings().sanityCheck(settings);
global_context->getReplicatedMergeTreeSettings().sanityCheck(settings);
/// Set up encryption.
if (config().has("encryption.key_command"))
loadEncryptionKey(config().getString("encryption.key_command"), log);
Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
@ -1047,6 +1094,7 @@ if (ThreadFuzzer::instance().isEffective())
loadMetadataSystem(global_context);
/// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
auto & database_catalog = DatabaseCatalog::instance();
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper);

View File

@ -1002,6 +1002,16 @@
</compression>
-->
<!-- Configuration of encryption. The server executes a command to
obtain an encryption key at startup if such a command is
defined, or encryption codecs will be disabled otherwise. The
command is executed through /bin/sh and is expected to write
a Base64-encoded key to the stdout. -->
<encryption>
<!-- <key_command>/usr/bin/systemd-ask-password &#45;&#45;id="clickhouse-server" &#45;&#45;timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command> -->
<!-- <key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command> -->
</encryption>
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->
<distributed_ddl>
@ -1156,4 +1166,27 @@
<!-- Uncomment to disable ClickHouse internal DNS caching. -->
<!-- <disable_internal_dns_cache>1</disable_internal_dns_cache> -->
<!-- You can also configure rocksdb like this: -->
<!--
<rocksdb>
<options>
<max_background_jobs>8</max_background_jobs>
</options>
<column_family_options>
<num_levels>2</num_levels>
</column_family_options>
<tables>
<table>
<name>TABLE</name>
<options>
<max_background_jobs>8</max_background_jobs>
</options>
<column_family_options>
<num_levels>2</num_levels>
</column_family_options>
</table>
</tables>
</rocksdb>
-->
</yandex>

View File

@ -11,7 +11,7 @@ bool User::equal(const IAccessEntity & other) const
const auto & other_user = typeid_cast<const User &>(other);
return (authentication == other_user.authentication) && (allowed_client_hosts == other_user.allowed_client_hosts)
&& (access == other_user.access) && (granted_roles == other_user.granted_roles) && (default_roles == other_user.default_roles)
&& (settings == other_user.settings) && (grantees == other_user.grantees);
&& (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database);
}
}

View File

@ -22,6 +22,7 @@ struct User : public IAccessEntity
RolesOrUsersSet default_roles = RolesOrUsersSet::AllTag{};
SettingsProfileElements settings;
RolesOrUsersSet grantees = RolesOrUsersSet::AllTag{};
String default_database;
bool equal(const IAccessEntity & other) const override;
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<User>(); }

View File

@ -196,6 +196,9 @@ namespace
user->access.revokeGrantOption(AccessType::ALL);
}
String default_database = config.getString(user_config + ".default_database", "");
user->default_database = default_database;
return user;
}

View File

@ -43,6 +43,7 @@ SRCS(
SettingsProfile.cpp
SettingsProfileElement.cpp
SettingsProfilesCache.cpp
SettingsProfilesInfo.cpp
User.cpp
UsersConfigAccessStorage.cpp
tests/gtest_access_rights_ops.cpp

View File

@ -43,9 +43,9 @@ public:
const AggregateFunctionPtr & nested_function,
const AggregateFunctionProperties &,
const DataTypes & arguments,
const Array &) const override
const Array & params) const override
{
return std::make_shared<AggregateFunctionArray>(nested_function, arguments);
return std::make_shared<AggregateFunctionArray>(nested_function, arguments, params);
}
};

View File

@ -29,10 +29,11 @@ private:
size_t num_arguments;
public:
AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments)
: IAggregateFunctionHelper<AggregateFunctionArray>(arguments, {})
AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments, const Array & params_)
: IAggregateFunctionHelper<AggregateFunctionArray>(arguments, params_)
, nested_func(nested_), num_arguments(arguments.size())
{
assert(parameters == nested_func->getParameters());
for (const auto & type : arguments)
if (!isArray(type))
throw Exception("All arguments for aggregate function " + getName() + " must be arrays", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -34,14 +34,14 @@ public:
const AggregateFunctionPtr & nested_function,
const AggregateFunctionProperties &,
const DataTypes & arguments,
const Array &) const override
const Array & params) const override
{
AggregateFunctionPtr res;
if (arguments.size() == 1)
{
res.reset(createWithNumericType<
AggregateFunctionDistinct,
AggregateFunctionDistinctSingleNumericData>(*arguments[0], nested_function, arguments));
AggregateFunctionDistinctSingleNumericData>(*arguments[0], nested_function, arguments, params));
if (res)
return res;
@ -49,14 +49,14 @@ public:
if (arguments[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
return std::make_shared<
AggregateFunctionDistinct<
AggregateFunctionDistinctSingleGenericData<true>>>(nested_function, arguments);
AggregateFunctionDistinctSingleGenericData<true>>>(nested_function, arguments, params);
else
return std::make_shared<
AggregateFunctionDistinct<
AggregateFunctionDistinctSingleGenericData<false>>>(nested_function, arguments);
AggregateFunctionDistinctSingleGenericData<false>>>(nested_function, arguments, params);
}
return std::make_shared<AggregateFunctionDistinct<AggregateFunctionDistinctMultipleGenericData>>(nested_function, arguments);
return std::make_shared<AggregateFunctionDistinct<AggregateFunctionDistinctMultipleGenericData>>(nested_function, arguments, params);
}
};

Some files were not shown because too many files have changed in this diff Show More