mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into improve_create_or_replace
This commit is contained in:
commit
dc57254982
6
.github/PULL_REQUEST_TEMPLATE.md
vendored
6
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -19,9 +19,9 @@ Detailed description / Documentation draft:
|
||||
...
|
||||
|
||||
|
||||
By adding documentation, you'll allow users to try your new feature immediately, not when someone else will have time to document it later. Documentation is necessary for all features that affect user experience in any way. You can add brief documentation draft above, or add documentation right into your patch as Markdown files in [docs](https://github.com/ClickHouse/ClickHouse/tree/master/docs) folder.
|
||||
> By adding documentation, you'll allow users to try your new feature immediately, not when someone else will have time to document it later. Documentation is necessary for all features that affect user experience in any way. You can add brief documentation draft above, or add documentation right into your patch as Markdown files in [docs](https://github.com/ClickHouse/ClickHouse/tree/master/docs) folder.
|
||||
|
||||
If you are doing this for the first time, it's recommended to read the lightweight [Contributing to ClickHouse Documentation](https://github.com/ClickHouse/ClickHouse/tree/master/docs/README.md) guide first.
|
||||
> If you are doing this for the first time, it's recommended to read the lightweight [Contributing to ClickHouse Documentation](https://github.com/ClickHouse/ClickHouse/tree/master/docs/README.md) guide first.
|
||||
|
||||
|
||||
Information about CI checks: https://clickhouse.tech/docs/en/development/continuous-integration/
|
||||
> Information about CI checks: https://clickhouse.tech/docs/en/development/continuous-integration/
|
||||
|
9
.gitmodules
vendored
9
.gitmodules
vendored
@ -225,6 +225,15 @@
|
||||
[submodule "contrib/yaml-cpp"]
|
||||
path = contrib/yaml-cpp
|
||||
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
|
||||
[submodule "contrib/libstemmer_c"]
|
||||
path = contrib/libstemmer_c
|
||||
url = https://github.com/ClickHouse-Extras/libstemmer_c.git
|
||||
[submodule "contrib/wordnet-blast"]
|
||||
path = contrib/wordnet-blast
|
||||
url = https://github.com/ClickHouse-Extras/wordnet-blast.git
|
||||
[submodule "contrib/lemmagen-c"]
|
||||
path = contrib/lemmagen-c
|
||||
url = https://github.com/ClickHouse-Extras/lemmagen-c.git
|
||||
[submodule "contrib/libpqxx"]
|
||||
path = contrib/libpqxx
|
||||
url = https://github.com/ClickHouse-Extras/libpqxx.git
|
||||
|
@ -542,6 +542,7 @@ include (cmake/find/libpqxx.cmake)
|
||||
include (cmake/find/nuraft.cmake)
|
||||
include (cmake/find/yaml-cpp.cmake)
|
||||
include (cmake/find/s2geometry.cmake)
|
||||
include (cmake/find/nlp.cmake)
|
||||
|
||||
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set (ENABLE_ORC OFF CACHE INTERNAL "")
|
||||
|
@ -259,10 +259,25 @@ private:
|
||||
Poco::Logger * log;
|
||||
BaseDaemon & daemon;
|
||||
|
||||
void onTerminate(const std::string & message, UInt32 thread_num) const
|
||||
void onTerminate(std::string_view message, UInt32 thread_num) const
|
||||
{
|
||||
size_t pos = message.find('\n');
|
||||
|
||||
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message);
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos));
|
||||
|
||||
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
|
||||
while (pos != std::string_view::npos)
|
||||
{
|
||||
++pos;
|
||||
size_t next_pos = message.find('\n', pos);
|
||||
size_t size = next_pos;
|
||||
if (next_pos != std::string_view::npos)
|
||||
size = next_pos - pos;
|
||||
|
||||
LOG_FATAL(log, "{}", message.substr(pos, size));
|
||||
pos = next_pos;
|
||||
}
|
||||
}
|
||||
|
||||
void onFault(
|
||||
|
@ -4,13 +4,24 @@ QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
if [ -x ./clickhouse ]
|
||||
then
|
||||
CLICKHOUSE_CLIENT="./clickhouse client"
|
||||
elif command -v clickhouse-client >/dev/null 2>&1
|
||||
then
|
||||
CLICKHOUSE_CLIENT="clickhouse-client"
|
||||
else
|
||||
echo "clickhouse-client is not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1)
|
||||
RES=$(${CLICKHOUSE_CLIENT} --time --format=Null --max_memory_usage=100G --query="$query" 2>&1)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
|
@ -11,8 +11,8 @@ DATASET="${TABLE}_v1.tar.xz"
|
||||
QUERIES_FILE="queries.sql"
|
||||
TRIES=3
|
||||
|
||||
AMD64_BIN_URL="https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_build_check/gcc-10_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"
|
||||
AARCH64_BIN_URL="https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_special_build_check/clang-10-aarch64_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"
|
||||
AMD64_BIN_URL="https://builds.clickhouse.tech/master/amd64/clickhouse"
|
||||
AARCH64_BIN_URL="https://builds.clickhouse.tech/master/aarch64/clickhouse"
|
||||
|
||||
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
|
||||
|
||||
@ -89,7 +89,7 @@ cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(./clickhouse client --max_memory_usage 100000000000 --time --format=Null --query="$query" 2>&1 ||:)
|
||||
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
|
32
cmake/find/nlp.cmake
Normal file
32
cmake/find/nlp.cmake
Normal file
@ -0,0 +1,32 @@
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_NLP)
|
||||
|
||||
message (STATUS "NLP functions disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c/Makefile")
|
||||
message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libstemmer_c library, NLP functions will be disabled")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast/CMakeLists.txt")
|
||||
message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal wordnet-blast library, NLP functions will be disabled")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c/README.md")
|
||||
message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal lemmagen-c library, NLP functions will be disabled")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
set (USE_NLP 1)
|
||||
|
||||
message (STATUS "Using Libraries for NLP functions: contrib/wordnet-blast, contrib/libstemmer_c, contrib/lemmagen-c")
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -328,6 +328,12 @@ endif()
|
||||
|
||||
add_subdirectory(fast_float)
|
||||
|
||||
if (USE_NLP)
|
||||
add_subdirectory(libstemmer-c-cmake)
|
||||
add_subdirectory(wordnet-blast-cmake)
|
||||
add_subdirectory(lemmagen-c-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_SQLITE)
|
||||
add_subdirectory(sqlite-cmake)
|
||||
endif()
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 976874b7aa7f422bf4ea595bb7d1166c617b1c26
|
||||
Subproject commit 0ce9490093021c63564cca159571a8b27772ad48
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 1ccbb5a522a571ce83b606dbc2e1011c42ecccfb
|
||||
Subproject commit 9cf09dbfd55a5c6202dedbdf40781a51b02c2675
|
@ -13,11 +13,12 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
regex
|
||||
context
|
||||
coroutine
|
||||
graph
|
||||
)
|
||||
|
||||
if(Boost_INCLUDE_DIR AND Boost_FILESYSTEM_LIBRARY AND Boost_FILESYSTEM_LIBRARY AND
|
||||
Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY AND Boost_CONTEXT_LIBRARY AND
|
||||
Boost_COROUTINE_LIBRARY)
|
||||
Boost_COROUTINE_LIBRARY AND Boost_GRAPH_LIBRARY)
|
||||
|
||||
set(EXTERNAL_BOOST_FOUND 1)
|
||||
|
||||
@ -32,6 +33,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
add_library (_boost_system INTERFACE)
|
||||
add_library (_boost_context INTERFACE)
|
||||
add_library (_boost_coroutine INTERFACE)
|
||||
add_library (_boost_graph INTERFACE)
|
||||
|
||||
target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
|
||||
target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
|
||||
@ -40,6 +42,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
|
||||
target_link_libraries (_boost_context INTERFACE ${Boost_CONTEXT_LIBRARY})
|
||||
target_link_libraries (_boost_coroutine INTERFACE ${Boost_COROUTINE_LIBRARY})
|
||||
target_link_libraries (_boost_graph INTERFACE ${Boost_GRAPH_LIBRARY})
|
||||
|
||||
add_library (boost::filesystem ALIAS _boost_filesystem)
|
||||
add_library (boost::iostreams ALIAS _boost_iostreams)
|
||||
@ -48,6 +51,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
add_library (boost::system ALIAS _boost_system)
|
||||
add_library (boost::context ALIAS _boost_context)
|
||||
add_library (boost::coroutine ALIAS _boost_coroutine)
|
||||
add_library (boost::graph ALIAS _boost_graph)
|
||||
else()
|
||||
set(EXTERNAL_BOOST_FOUND 0)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system boost")
|
||||
@ -221,4 +225,17 @@ if (NOT EXTERNAL_BOOST_FOUND)
|
||||
add_library (boost::coroutine ALIAS _boost_coroutine)
|
||||
target_include_directories (_boost_coroutine PRIVATE ${LIBRARY_DIR})
|
||||
target_link_libraries(_boost_coroutine PRIVATE _boost_context)
|
||||
|
||||
# graph
|
||||
|
||||
set (SRCS_GRAPH
|
||||
"${LIBRARY_DIR}/libs/graph/src/graphml.cpp"
|
||||
"${LIBRARY_DIR}/libs/graph/src/read_graphviz_new.cpp"
|
||||
)
|
||||
|
||||
add_library (_boost_graph ${SRCS_GRAPH})
|
||||
add_library (boost::graph ALIAS _boost_graph)
|
||||
target_include_directories (_boost_graph PRIVATE ${LIBRARY_DIR})
|
||||
target_link_libraries(_boost_graph PRIVATE _boost_regex)
|
||||
|
||||
endif ()
|
||||
|
1
contrib/lemmagen-c
vendored
Submodule
1
contrib/lemmagen-c
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 59537bdcf57bbed17913292cb4502d15657231f1
|
9
contrib/lemmagen-c-cmake/CMakeLists.txt
Normal file
9
contrib/lemmagen-c-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,9 @@
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c")
|
||||
set(LEMMAGEN_INCLUDE_DIR "${LIBRARY_DIR}/include")
|
||||
|
||||
set(SRCS
|
||||
"${LIBRARY_DIR}/src/RdrLemmatizer.cpp"
|
||||
)
|
||||
|
||||
add_library(lemmagen STATIC ${SRCS})
|
||||
target_include_directories(lemmagen PUBLIC "${LEMMAGEN_INCLUDE_DIR}")
|
31
contrib/libstemmer-c-cmake/CMakeLists.txt
Normal file
31
contrib/libstemmer-c-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,31 @@
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c")
|
||||
set(STEMMER_INCLUDE_DIR "${LIBRARY_DIR}/include")
|
||||
|
||||
FILE ( READ "${LIBRARY_DIR}/mkinc.mak" _CONTENT )
|
||||
# replace '\ ' into one big line
|
||||
STRING ( REGEX REPLACE "\\\\\n " " ${LIBRARY_DIR}/" _CONTENT "${_CONTENT}" )
|
||||
# escape ';' (if any)
|
||||
STRING ( REGEX REPLACE ";" "\\\\;" _CONTENT "${_CONTENT}" )
|
||||
# now replace lf into ';' (it makes list from the line)
|
||||
STRING ( REGEX REPLACE "\n" ";" _CONTENT "${_CONTENT}" )
|
||||
FOREACH ( LINE ${_CONTENT} )
|
||||
# skip comments (beginning with #)
|
||||
IF ( NOT "${LINE}" MATCHES "^#.*" )
|
||||
# parse 'name=value1 value2..." - extract the 'name' part
|
||||
STRING ( REGEX REPLACE "=.*$" "" _NAME "${LINE}" )
|
||||
# extract the list of values part
|
||||
STRING ( REGEX REPLACE "^.*=" "" _LIST "${LINE}" )
|
||||
# replace (multi)spaces into ';' (it makes list from the line)
|
||||
STRING ( REGEX REPLACE " +" ";" _LIST "${_LIST}" )
|
||||
# finally get our two variables
|
||||
IF ( "${_NAME}" MATCHES "snowball_sources" )
|
||||
SET ( _SOURCES "${_LIST}" )
|
||||
ELSEIF ( "${_NAME}" MATCHES "snowball_headers" )
|
||||
SET ( _HEADERS "${_LIST}" )
|
||||
ENDIF ()
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
# all the sources parsed. Now just add the lib
|
||||
add_library ( stemmer STATIC ${_SOURCES} ${_HEADERS} )
|
||||
target_include_directories (stemmer PUBLIC "${STEMMER_INCLUDE_DIR}")
|
1
contrib/libstemmer_c
vendored
Submodule
1
contrib/libstemmer_c
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit c753054304d87daf460057c1a649c482aa094835
|
@ -22,6 +22,7 @@ set(SRCS
|
||||
"${LIBRARY_DIR}/src/launcher.cxx"
|
||||
"${LIBRARY_DIR}/src/srv_config.cxx"
|
||||
"${LIBRARY_DIR}/src/snapshot_sync_req.cxx"
|
||||
"${LIBRARY_DIR}/src/snapshot_sync_ctx.cxx"
|
||||
"${LIBRARY_DIR}/src/handle_timeout.cxx"
|
||||
"${LIBRARY_DIR}/src/handle_append_entries.cxx"
|
||||
"${LIBRARY_DIR}/src/cluster_config.cxx"
|
||||
|
2
contrib/protobuf
vendored
2
contrib/protobuf
vendored
@ -1 +1 @@
|
||||
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee
|
||||
Subproject commit 75601841d172c73ae6bf4ce8121f42b875cdbabd
|
1
contrib/wordnet-blast
vendored
Submodule
1
contrib/wordnet-blast
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 1d16ac28036e19fe8da7ba72c16a307fbdf8c87e
|
13
contrib/wordnet-blast-cmake/CMakeLists.txt
Normal file
13
contrib/wordnet-blast-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,13 @@
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast")
|
||||
|
||||
set(SRCS
|
||||
"${LIBRARY_DIR}/wnb/core/info_helper.cc"
|
||||
"${LIBRARY_DIR}/wnb/core/load_wordnet.cc"
|
||||
"${LIBRARY_DIR}/wnb/core/wordnet.cc"
|
||||
)
|
||||
|
||||
add_library(wnb ${SRCS})
|
||||
|
||||
target_link_libraries(wnb PRIVATE boost::headers_only boost::graph)
|
||||
|
||||
target_include_directories(wnb PUBLIC "${LIBRARY_DIR}")
|
@ -23,6 +23,7 @@ RUN apt-get update \
|
||||
libboost-regex-dev \
|
||||
libboost-context-dev \
|
||||
libboost-coroutine-dev \
|
||||
libboost-graph-dev \
|
||||
zlib1g-dev \
|
||||
liblz4-dev \
|
||||
libdouble-conversion-dev \
|
||||
|
@ -61,4 +61,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7'
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
|
||||
|
||||
CMD sleep 1
|
||||
|
@ -299,6 +299,7 @@ function run_tests
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01663_aes_msan # Depends on OpenSSL
|
||||
01667_aes_args_check # Depends on OpenSSL
|
||||
01683_codec_encrypted # Depends on OpenSSL
|
||||
01776_decrypt_aead_size_check # Depends on OpenSSL
|
||||
01811_filter_by_null # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
@ -310,6 +311,7 @@ function run_tests
|
||||
01411_bayesian_ab_testing
|
||||
01798_uniq_theta_sketch
|
||||
01799_long_uniq_theta_sketch
|
||||
01890_stem # depends on libstemmer_c
|
||||
collate
|
||||
collation
|
||||
_orc_
|
||||
|
@ -14,10 +14,14 @@ services:
|
||||
}
|
||||
EOF
|
||||
./docker-entrypoint.sh'
|
||||
ports:
|
||||
- 9020:9019
|
||||
expose:
|
||||
- 9019
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-s", "localhost:9019/ping"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 30
|
||||
volumes:
|
||||
- type: ${JDBC_BRIDGE_FS:-tmpfs}
|
||||
source: ${JDBC_BRIDGE_LOGS:-}
|
||||
target: /app/logs
|
@ -0,0 +1,13 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
mongo1:
|
||||
image: mongo:3.6
|
||||
restart: always
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: root
|
||||
MONGO_INITDB_ROOT_PASSWORD: clickhouse
|
||||
volumes:
|
||||
- ${MONGO_CONFIG_PATH}:/mongo/
|
||||
ports:
|
||||
- ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT}
|
||||
command: --config /mongo/mongo_secure.conf --profile=2 --verbose
|
@ -2,7 +2,7 @@ version: '2.3'
|
||||
|
||||
services:
|
||||
rabbitmq1:
|
||||
image: rabbitmq:3-management-alpine
|
||||
image: rabbitmq:3.8-management-alpine
|
||||
hostname: rabbitmq1
|
||||
expose:
|
||||
- ${RABBITMQ_PORT}
|
||||
|
@ -2,6 +2,11 @@
|
||||
|
||||
set -e -x
|
||||
|
||||
# Choose random timezone for this test run
|
||||
TZ="$(grep -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
||||
echo "Choosen random timezone $TZ"
|
||||
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb;
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
|
@ -3,6 +3,11 @@
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
# Choose random timezone for this test run.
|
||||
TZ="$(grep -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
||||
echo "Choosen random timezone $TZ"
|
||||
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
@ -138,6 +143,7 @@ if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
|
||||
fi
|
||||
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
|
||||
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
|
||||
tar -chf /test_output/zookeeper_log_dump.tar /var/lib/clickhouse/data/system/zookeeper_log ||:
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
@ -147,6 +153,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
|
||||
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
|
||||
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
|
||||
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
|
||||
tar -chf /test_output/zookeeper_log_dump1.tar /var/lib/clickhouse1/data/system/zookeeper_log ||:
|
||||
tar -chf /test_output/zookeeper_log_dump2.tar /var/lib/clickhouse2/data/system/zookeeper_log ||:
|
||||
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
|
||||
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
|
||||
fi
|
||||
|
@ -77,9 +77,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
|
||||
&& rm -rf /tmp/clickhouse-odbc-tmp
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
||||
|
@ -20,6 +20,7 @@ def get_skip_list_cmd(path):
|
||||
|
||||
def get_options(i):
|
||||
options = []
|
||||
client_options = []
|
||||
if 0 < i:
|
||||
options.append("--order=random")
|
||||
|
||||
@ -27,25 +28,29 @@ def get_options(i):
|
||||
options.append("--db-engine=Ordinary")
|
||||
|
||||
if i % 3 == 2:
|
||||
options.append('''--client-option='allow_experimental_database_replicated=1' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
|
||||
options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
|
||||
client_options.append('allow_experimental_database_replicated=1')
|
||||
|
||||
# If database name is not specified, new database is created for each functional test.
|
||||
# Run some threads with one database for all tests.
|
||||
if i % 2 == 1:
|
||||
options.append(" --database=test_{}".format(i))
|
||||
|
||||
if i % 7 == 0:
|
||||
options.append(" --client-option='join_use_nulls=1'")
|
||||
if i % 5 == 1:
|
||||
client_options.append("join_use_nulls=1")
|
||||
|
||||
if i % 14 == 0:
|
||||
options.append(' --client-option="join_algorithm=\'partial_merge\'"')
|
||||
if i % 15 == 6:
|
||||
client_options.append("join_algorithm='partial_merge'")
|
||||
|
||||
if i % 21 == 0:
|
||||
options.append(' --client-option="join_algorithm=\'auto\'"')
|
||||
options.append(' --client-option="max_rows_in_join=1000"')
|
||||
if i % 15 == 11:
|
||||
client_options.append("join_algorithm='auto'")
|
||||
client_options.append('max_rows_in_join=1000')
|
||||
|
||||
if i == 13:
|
||||
options.append(" --client-option='memory_tracker_fault_probability=0.00001'")
|
||||
client_options.append('memory_tracker_fault_probability=0.001')
|
||||
|
||||
if client_options:
|
||||
options.append(" --client-option " + ' '.join(client_options))
|
||||
|
||||
return ' '.join(options)
|
||||
|
||||
|
@ -35,7 +35,7 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN pip3 install urllib3 testflows==1.6.90 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal python-dateutil numpy
|
||||
RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal python-dateutil numpy
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 20.10.6
|
||||
|
@ -1,8 +1,6 @@
|
||||
# docker build -t yandex/clickhouse-unit-test .
|
||||
FROM yandex/clickhouse-stateless-test
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
RUN apt-get install gdb
|
||||
|
||||
COPY run.sh /
|
||||
|
@ -8,7 +8,7 @@ toc_title: Third-Party Libraries Used
|
||||
The list of third-party libraries can be obtained by the following query:
|
||||
|
||||
``` sql
|
||||
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'
|
||||
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en';
|
||||
```
|
||||
|
||||
[Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
|
||||
|
@ -749,7 +749,7 @@ If your code in the `master` branch is not buildable yet, exclude it from the bu
|
||||
|
||||
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
|
||||
|
||||
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
|
||||
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse. See [Guidelines for adding new third-party libraries](contrib.md#adding-third-party-libraries) for details.
|
||||
|
||||
**3.** Preference is always given to libraries that are already in use.
|
||||
|
||||
|
@ -70,7 +70,13 @@ Note that integration of ClickHouse with third-party drivers is not tested. Also
|
||||
|
||||
Unit tests are useful when you want to test not the ClickHouse as a whole, but a single isolated library or class. You can enable or disable build of tests with `ENABLE_TESTS` CMake option. Unit tests (and other test programs) are located in `tests` subdirectories across the code. To run unit tests, type `ninja test`. Some tests use `gtest`, but some are just programs that return non-zero exit code on test failure.
|
||||
|
||||
It’s not necessarily to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use).
|
||||
It’s not necessary to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use).
|
||||
|
||||
You can run individual gtest checks by calling the executable directly, for example:
|
||||
|
||||
```bash
|
||||
$ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
|
||||
```
|
||||
|
||||
## Performance Tests {#performance-tests}
|
||||
|
||||
|
@ -14,7 +14,7 @@ You can also use the following database engines:
|
||||
|
||||
- [MySQL](../../engines/database-engines/mysql.md)
|
||||
|
||||
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
|
||||
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
|
||||
|
||||
- [Lazy](../../engines/database-engines/lazy.md)
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
toc_priority: 29
|
||||
toc_title: MaterializeMySQL
|
||||
toc_title: MaterializedMySQL
|
||||
---
|
||||
|
||||
# MaterializeMySQL {#materialize-mysql}
|
||||
# MaterializedMySQL {#materialized-mysql}
|
||||
|
||||
**This is experimental feature that should not be used in production.**
|
||||
|
||||
@ -17,7 +17,7 @@ This feature is experimental.
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
|
||||
ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
|
||||
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
@ -36,15 +36,22 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor
|
||||
- `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disable retry. Default: `1000`.
|
||||
- `allows_query_when_mysql_lost` — Allow query materialized table when mysql is lost. Default: `0` (`false`).
|
||||
```
|
||||
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***')
|
||||
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
|
||||
SETTINGS
|
||||
allows_query_when_mysql_lost=true,
|
||||
max_wait_time_when_mysql_unavailable=10000;
|
||||
```
|
||||
|
||||
**Settings on MySQL-server side**
|
||||
|
||||
For the correct work of `MaterializeMySQL`, there are few mandatory `MySQL`-side configuration settings that should be set:
|
||||
|
||||
- `default_authentication_plugin = mysql_native_password` since `MaterializeMySQL` can only authorize with this method.
|
||||
- `gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializeMySQL` replication. Pay attention that while turning this mode `On` you should also specify `enforce_gtid_consistency = on`.
|
||||
|
||||
## Virtual columns {#virtual-columns}
|
||||
|
||||
When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
|
||||
When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
|
||||
|
||||
- `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values:
|
||||
@ -70,6 +77,7 @@ When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](.
|
||||
| STRING | [String](../../sql-reference/data-types/string.md) |
|
||||
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
|
||||
| BLOB | [String](../../sql-reference/data-types/string.md) |
|
||||
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
|
||||
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
|
||||
|
||||
@ -77,13 +85,21 @@ Other types are not supported. If MySQL table contains a column of such type, Cl
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
### Compatibility restrictions
|
||||
|
||||
Apart of the data types limitations there are few restrictions comparing to `MySQL` databases, that should be resolved before replication will be possible:
|
||||
|
||||
- Each table in `MySQL` should contain `PRIMARY KEY`.
|
||||
|
||||
- Replication for tables, those are containing rows with `ENUM` field values out of range (specified in `ENUM` signature) will not work.
|
||||
|
||||
### DDL Queries {#ddl-queries}
|
||||
|
||||
MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). If ClickHouse cannot parse some DDL query, the query is ignored.
|
||||
|
||||
### Data Replication {#data-replication}
|
||||
|
||||
`MaterializeMySQL` does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
|
||||
`MaterializedMySQL` does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
|
||||
|
||||
- MySQL `INSERT` query is converted into `INSERT` with `_sign=1`.
|
||||
|
||||
@ -91,9 +107,9 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([
|
||||
|
||||
- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1`.
|
||||
|
||||
### Selecting from MaterializeMySQL Tables {#select}
|
||||
### Selecting from MaterializedMySQL Tables {#select}
|
||||
|
||||
`SELECT` query from `MaterializeMySQL` tables has some specifics:
|
||||
`SELECT` query from `MaterializedMySQL` tables has some specifics:
|
||||
|
||||
- If `_version` is not specified in the `SELECT` query, [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used. So only rows with `MAX(_version)` are selected.
|
||||
|
||||
@ -110,10 +126,10 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
|
||||
**Notes**
|
||||
|
||||
- Rows with `_sign=-1` are not deleted physically from the tables.
|
||||
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
|
||||
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine.
|
||||
- Replication can be easily broken.
|
||||
- Manual operations on database and tables are forbidden.
|
||||
- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes.
|
||||
- `MaterializedMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL server changes.
|
||||
|
||||
## Examples of Use {#examples-of-use}
|
||||
|
||||
@ -142,7 +158,7 @@ Database in ClickHouse, exchanging data with the MySQL server:
|
||||
The database and the table created:
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***');
|
||||
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
|
||||
SHOW TABLES FROM mysql;
|
||||
```
|
||||
|
||||
@ -177,4 +193,4 @@ SELECT * FROM mysql.test;
|
||||
└───┴─────┴──────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) <!--hide-->
|
||||
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialized-mysql/) <!--hide-->
|
@ -39,4 +39,46 @@ ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
There is also `system.rocksdb` table, that expose rocksdb statistics:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
name,
|
||||
value
|
||||
FROM system.rocksdb
|
||||
|
||||
┌─name──────────────────────┬─value─┐
|
||||
│ no.file.opens │ 1 │
|
||||
│ number.block.decompressed │ 1 │
|
||||
└───────────────────────────┴───────┘
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wiki/Option-String-and-Option-Map) using config:
|
||||
|
||||
```xml
|
||||
<rocksdb>
|
||||
<options>
|
||||
<max_background_jobs>8</max_background_jobs>
|
||||
</options>
|
||||
<column_family_options>
|
||||
<num_levels>2</num_levels>
|
||||
</column_family_options>
|
||||
<tables>
|
||||
<table>
|
||||
<name>TABLE</name>
|
||||
<options>
|
||||
<max_background_jobs>8</max_background_jobs>
|
||||
</options>
|
||||
<column_family_options>
|
||||
<num_levels>2</num_levels>
|
||||
</column_family_options>
|
||||
</table>
|
||||
</tables>
|
||||
</rocksdb>
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/embedded-rocksdb/) <!--hide-->
|
||||
|
@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = MongoDB(host:port, database, collection, user, password);
|
||||
) ENGINE = MongoDB(host:port, database, collection, user, password [, options]);
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
@ -30,9 +30,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
|
||||
- `password` — User password.
|
||||
|
||||
- `options` — MongoDB connection string options (optional parameter).
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Table in ClickHouse which allows to read data from MongoDB collection:
|
||||
Create a table in ClickHouse which allows to read data from MongoDB collection:
|
||||
|
||||
``` text
|
||||
CREATE TABLE mongo_table
|
||||
@ -42,6 +44,16 @@ CREATE TABLE mongo_table
|
||||
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
|
||||
```
|
||||
|
||||
To read from an SSL secured MongoDB server:
|
||||
|
||||
``` text
|
||||
CREATE TABLE mongo_table_ssl
|
||||
(
|
||||
key UInt64,
|
||||
data String
|
||||
) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
|
@ -14,6 +14,8 @@ Engines of the family:
|
||||
- [Log](../../../engines/table-engines/log-family/log.md)
|
||||
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
|
||||
|
||||
`Log` family table engines can store data to [HDFS](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-hdfs) or [S3](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3) distributed file systems.
|
||||
|
||||
## Common Properties {#common-properties}
|
||||
|
||||
Engines:
|
||||
|
@ -5,10 +5,8 @@ toc_title: Log
|
||||
|
||||
# Log {#log}
|
||||
|
||||
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.
|
||||
The engine belongs to the family of `Log` engines. See the common properties of `Log` engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.
|
||||
|
||||
Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
|
||||
`Log` differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
|
||||
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
|
||||
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log/) <!--hide-->
|
||||
The `Log` engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The `Log` engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.
|
||||
|
@ -728,7 +728,7 @@ During this time, they are not moved to other volumes or disks. Therefore, until
|
||||
|
||||
## Using S3 for Data Storage {#table_engine-mergetree-s3}
|
||||
|
||||
`MergeTree` family table engines is able to store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
|
||||
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
|
||||
|
||||
This feature is under development and not ready for production. There are known drawbacks such as very low performance.
|
||||
|
||||
@ -764,11 +764,13 @@ Configuration markup:
|
||||
```
|
||||
|
||||
Required parameters:
|
||||
- `endpoint` — S3 endpoint url in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint url should contain bucket and root path to store data.
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
- `access_key_id` — S3 access key id.
|
||||
- `secret_access_key` — S3 secret access key.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `region` — S3 region name.
|
||||
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
|
||||
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
|
||||
@ -784,7 +786,6 @@ Optional parameters:
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
@ -823,4 +824,43 @@ S3 disk can be configured as `main` or `cold` storage:
|
||||
|
||||
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) <!--hide-->
|
||||
## Using HDFS for Data Storage {#table_engine-mergetree-hdfs}
|
||||
|
||||
[HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) is a distributed file system for remote data storage.
|
||||
|
||||
`MergeTree` family table engines can store data to HDFS using a disk with type `HDFS`.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<yandex>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
</hdfs>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
</merge_tree>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
|
||||
|
@ -43,7 +43,7 @@ toc_title: Integrations
|
||||
- Monitoring
|
||||
- [Graphite](https://graphiteapp.org)
|
||||
- [graphouse](https://github.com/yandex/graphouse)
|
||||
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) +
|
||||
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse)
|
||||
- [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse)
|
||||
- [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied
|
||||
- [Grafana](https://grafana.com/)
|
||||
|
@ -115,6 +115,7 @@ toc_title: Adopters
|
||||
| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
|
||||
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
|
||||
| <a href="https://www.spark.co.nz/" class="favicon">Spark New Zealand</a> | Telecommunications | Security Operations | — | — | [Blog Post, Feb 2020](https://blog.n0p.me/2020/02/2020-02-05-dnsmonster/) |
|
||||
| <a href="https://splitbee.io" class="favicon">Splitbee</a> | Analytics | Main Product | — | — | [Blog Post, Mai 2021](https://splitbee.io/blog/new-pricing) |
|
||||
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
|
||||
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
|
||||
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
|
||||
|
@ -5,50 +5,67 @@ toc_title: Testing Hardware
|
||||
|
||||
# How to Test Your Hardware with ClickHouse {#how-to-test-your-hardware-with-clickhouse}
|
||||
|
||||
With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages.
|
||||
You can run basic ClickHouse performance test on any server without installation of ClickHouse packages.
|
||||
|
||||
1. Go to “commits” page: https://github.com/ClickHouse/ClickHouse/commits/master
|
||||
2. Click on the first green check mark or red cross with green “ClickHouse Build Check” and click on the “Details” link near “ClickHouse Build Check”. There is no such link in some commits, for example commits with documentation. In this case, choose the nearest commit having this link.
|
||||
3. Copy the link to `clickhouse` binary for amd64 or aarch64.
|
||||
4. ssh to the server and download it with wget:
|
||||
|
||||
## Automated Run
|
||||
|
||||
You can run benchmark with a single script.
|
||||
|
||||
1. Download the script.
|
||||
```
|
||||
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/hardware.sh
|
||||
```
|
||||
|
||||
2. Run the script.
|
||||
```
|
||||
chmod a+x ./hardware.sh
|
||||
./hardware.sh
|
||||
```
|
||||
|
||||
3. Copy the output and send it to clickhouse-feedback@yandex-team.com
|
||||
|
||||
All the results are published here: https://clickhouse.tech/benchmark/hardware/
|
||||
|
||||
|
||||
## Manual Run
|
||||
|
||||
Alternatively you can perform benchmark in the following steps.
|
||||
|
||||
1. ssh to the server and download the binary with wget:
|
||||
```bash
|
||||
# These links are outdated, please obtain the fresh link from the "commits" page.
|
||||
# For amd64:
|
||||
wget https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_build_check/gcc-10_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse
|
||||
wget https://builds.clickhouse.tech/master/amd64/clickhouse
|
||||
# For aarch64:
|
||||
wget https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_special_build_check/clang-10-aarch64_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse
|
||||
wget https://builds.clickhouse.tech/master/aarch64/clickhouse
|
||||
# Then do:
|
||||
chmod a+x clickhouse
|
||||
```
|
||||
5. Download benchmark files:
|
||||
2. Download benchmark files:
|
||||
```bash
|
||||
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/benchmark-new.sh
|
||||
chmod a+x benchmark-new.sh
|
||||
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql
|
||||
```
|
||||
6. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
|
||||
3. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
|
||||
```bash
|
||||
wget https://datasets.clickhouse.tech/hits/partitions/hits_100m_obfuscated_v1.tar.xz
|
||||
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
|
||||
mv hits_100m_obfuscated_v1/* .
|
||||
```
|
||||
7. Run the server:
|
||||
4. Run the server:
|
||||
```bash
|
||||
./clickhouse server
|
||||
```
|
||||
8. Check the data: ssh to the server in another terminal
|
||||
5. Check the data: ssh to the server in another terminal
|
||||
```bash
|
||||
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
|
||||
100000000
|
||||
```
|
||||
9. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `--max_memory_usage 100000000000` parameter.
|
||||
```bash
|
||||
mcedit benchmark-new.sh
|
||||
```
|
||||
10. Run the benchmark:
|
||||
6. Run the benchmark:
|
||||
```bash
|
||||
./benchmark-new.sh hits_100m_obfuscated
|
||||
```
|
||||
11. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
|
||||
7. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
|
||||
|
||||
All the results are published here: https://clickhouse.tech/benchmark/hardware/
|
||||
|
@ -69,6 +69,28 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
|
||||
</compression>
|
||||
```
|
||||
|
||||
## encryption {#server-settings-encryption}
|
||||
|
||||
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). The command, or a shell script, is expected to write a Base64-encoded key of any length to the stdout.
|
||||
|
||||
**Example**
|
||||
|
||||
For Linux with systemd:
|
||||
|
||||
```xml
|
||||
<encryption>
|
||||
<key_command>/usr/bin/systemd-ask-password --id="clickhouse-server" --timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command>
|
||||
</encryption>
|
||||
```
|
||||
|
||||
For other systems:
|
||||
|
||||
```xml
|
||||
<encryption>
|
||||
<key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command>
|
||||
</encryption>
|
||||
```
|
||||
|
||||
## custom_settings_prefixes {#custom_settings_prefixes}
|
||||
|
||||
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.
|
||||
|
@ -280,14 +280,13 @@ Default value: `0`.
|
||||
|
||||
## check_sample_column_is_correct {#check_sample_column_is_correct}
|
||||
|
||||
Enables to check column for sampling or sampling expression is correct at table creation.
|
||||
Enables the check at table creation, that the data type of a column for sampling or sampling expression is correct. The data type must be one of unsigned [integer types](../../sql-reference/data-types/int-uint.md): `UInt8`, `UInt16`, `UInt32`, `UInt64`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true — Check column or sampling expression is correct at table creation.
|
||||
- false — Do not check column or sampling expression is correct at table creation.
|
||||
- true — The check is enabled.
|
||||
- false — The check is disabled at table creation.
|
||||
|
||||
Default value: `true`.
|
||||
|
||||
By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting.
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->
|
||||
By default, the ClickHouse server checks at table creation the data type of a column for sampling or sampling expression. If you already have tables with incorrect sampling expression and do not want the server to raise an exception during startup, set `check_sample_column_is_correct` to `false`.
|
||||
|
@ -28,7 +28,7 @@ Structure of the `users` section:
|
||||
<profile>profile_name</profile>
|
||||
|
||||
<quota>default</quota>
|
||||
|
||||
<default_database>default<default_database>
|
||||
<databases>
|
||||
<database_name>
|
||||
<table_name>
|
||||
|
@ -20,6 +20,29 @@ Possible values:
|
||||
- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.`
|
||||
- `allow` — Allows the use of these types of subqueries.
|
||||
|
||||
## prefer_global_in_and_join {#prefer-global-in-and-join}
|
||||
|
||||
Enables the replacement of `IN`/`JOIN` operators with `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Usage**
|
||||
|
||||
Although `SET distributed_product_mode=global` can change the queries behavior for the distributed tables, it's not suitable for local tables or tables from external resources. Here is when the `prefer_global_in_and_join` setting comes into play.
|
||||
|
||||
For example, we have query serving nodes that contain local tables, which are not suitable for distribution. We need to scatter their data on the fly during distributed processing with the `GLOBAL` keyword — `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
|
||||
Another use case of `prefer_global_in_and_join` is accessing tables created by external engines. This setting helps to reduce the number of calls to external sources while joining such tables: only one call per query.
|
||||
|
||||
**See also:**
|
||||
|
||||
- [Distributed subqueries](../../sql-reference/operators/in.md#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN`
|
||||
|
||||
## enable_optimize_predicate_expression {#enable-optimize-predicate-expression}
|
||||
|
||||
Turns on predicate pushdown in `SELECT` queries.
|
||||
@ -2927,7 +2950,7 @@ Result:
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
|
||||
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) behaviour.
|
||||
|
||||
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
|
||||
|
||||
|
@ -62,4 +62,3 @@ exception_code: ZOK
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
|
||||
|
@ -51,6 +51,7 @@ Columns:
|
||||
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query.
|
||||
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query.
|
||||
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query.
|
||||
- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution.
|
||||
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception.
|
||||
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
|
||||
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully.
|
||||
@ -65,6 +66,8 @@ Columns:
|
||||
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
|
||||
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
|
||||
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query.
|
||||
- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution).
|
||||
- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution).
|
||||
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values:
|
||||
- 1 — TCP.
|
||||
- 2 — HTTP.
|
||||
@ -101,55 +104,77 @@ Columns:
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
|
||||
SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
type: QueryStart
|
||||
event_date: 2020-09-11
|
||||
event_time: 2020-09-11 10:08:17
|
||||
event_time_microseconds: 2020-09-11 10:08:17.063321
|
||||
query_start_time: 2020-09-11 10:08:17
|
||||
query_start_time_microseconds: 2020-09-11 10:08:17.063321
|
||||
query_duration_ms: 0
|
||||
read_rows: 0
|
||||
read_bytes: 0
|
||||
type: QueryFinish
|
||||
event_date: 2021-07-28
|
||||
event_time: 2021-07-28 13:46:56
|
||||
event_time_microseconds: 2021-07-28 13:46:56.719791
|
||||
query_start_time: 2021-07-28 13:46:56
|
||||
query_start_time_microseconds: 2021-07-28 13:46:56.704542
|
||||
query_duration_ms: 14
|
||||
read_rows: 8393
|
||||
read_bytes: 374325
|
||||
written_rows: 0
|
||||
written_bytes: 0
|
||||
result_rows: 0
|
||||
result_bytes: 0
|
||||
memory_usage: 0
|
||||
result_rows: 4201
|
||||
result_bytes: 153024
|
||||
memory_usage: 4714038
|
||||
current_database: default
|
||||
query: INSERT INTO test1 VALUES
|
||||
query: SELECT DISTINCT arrayJoin(extractAll(name, '[\\w_]{2,}')) AS res FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.table_engines UNION ALL SELECT name FROM system.formats UNION ALL SELECT name FROM system.table_functions UNION ALL SELECT name FROM system.data_type_families UNION ALL SELECT name FROM system.merge_tree_settings UNION ALL SELECT name FROM system.settings UNION ALL SELECT cluster FROM system.clusters UNION ALL SELECT macro FROM system.macros UNION ALL SELECT policy_name FROM system.storage_policies UNION ALL SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate UNION ALL SELECT name FROM system.databases LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.tables LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.dictionaries LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.columns LIMIT 10000) WHERE notEmpty(res)
|
||||
normalized_query_hash: 6666026786019643712
|
||||
query_kind: Select
|
||||
databases: ['system']
|
||||
tables: ['system.aggregate_function_combinators','system.clusters','system.columns','system.data_type_families','system.databases','system.dictionaries','system.formats','system.functions','system.macros','system.merge_tree_settings','system.settings','system.storage_policies','system.table_engines','system.table_functions','system.tables']
|
||||
columns: ['system.aggregate_function_combinators.name','system.clusters.cluster','system.columns.name','system.data_type_families.name','system.databases.name','system.dictionaries.name','system.formats.name','system.functions.is_aggregate','system.functions.name','system.macros.macro','system.merge_tree_settings.name','system.settings.name','system.storage_policies.policy_name','system.table_engines.name','system.table_functions.name','system.tables.name']
|
||||
projections: []
|
||||
exception_code: 0
|
||||
exception:
|
||||
stack_trace:
|
||||
is_initial_query: 1
|
||||
user: default
|
||||
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
|
||||
query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
|
||||
address: ::ffff:127.0.0.1
|
||||
port: 33452
|
||||
port: 51006
|
||||
initial_user: default
|
||||
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
|
||||
initial_query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
|
||||
initial_address: ::ffff:127.0.0.1
|
||||
initial_port: 33452
|
||||
initial_port: 51006
|
||||
initial_query_start_time: 2021-07-28 13:46:56
|
||||
initial_query_start_time_microseconds: 2021-07-28 13:46:56.704542
|
||||
interface: 1
|
||||
os_user: bharatnc
|
||||
client_hostname: tower
|
||||
client_name: ClickHouse
|
||||
client_revision: 54437
|
||||
client_version_major: 20
|
||||
client_version_minor: 7
|
||||
client_version_patch: 2
|
||||
os_user:
|
||||
client_hostname:
|
||||
client_name: ClickHouse client
|
||||
client_revision: 54449
|
||||
client_version_major: 21
|
||||
client_version_minor: 8
|
||||
client_version_patch: 0
|
||||
http_method: 0
|
||||
http_user_agent:
|
||||
http_referer:
|
||||
forwarded_for:
|
||||
quota_key:
|
||||
revision: 54440
|
||||
thread_ids: []
|
||||
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
|
||||
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
|
||||
revision: 54453
|
||||
log_comment:
|
||||
thread_ids: [5058,22097,22110,22094]
|
||||
ProfileEvents.Names: ['Query','SelectQuery','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSWriteChars']
|
||||
ProfileEvents.Values: [1,1,39,352256,64,360,8393,374325,412,440,34480,13108,4723,671,19,17828,8192,10240]
|
||||
Settings.Names: ['load_balancing','max_memory_usage']
|
||||
Settings.Values: ['random','10000000000']
|
||||
used_aggregate_functions: []
|
||||
used_aggregate_function_combinators: []
|
||||
used_database_engines: []
|
||||
used_data_type_families: ['UInt64','UInt8','Nullable','String','date']
|
||||
used_dictionaries: []
|
||||
used_formats: []
|
||||
used_functions: ['concat','notEmpty','extractAll']
|
||||
used_storages: []
|
||||
used_table_functions: []
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
@ -275,9 +275,13 @@ The dictionary is stored in a cache that has a fixed number of cells. These cell
|
||||
|
||||
When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache.
|
||||
|
||||
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used, and it is re-requested the next time it needs to be used.
|
||||
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
|
||||
|
||||
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`.
|
||||
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table.
|
||||
|
||||
If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source.
|
||||
|
||||
To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally.
|
||||
|
||||
Supported [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): MySQL, ClickHouse, executable, HTTP.
|
||||
@ -289,6 +293,16 @@ Example of settings:
|
||||
<cache>
|
||||
<!-- The size of the cache, in number of cells. Rounded up to a power of two. -->
|
||||
<size_in_cells>1000000000</size_in_cells>
|
||||
<!-- Allows to read expired keys. -->
|
||||
<allow_read_expired_keys>0</allow_read_expired_keys>
|
||||
<!-- Max size of update queue. -->
|
||||
<max_update_queue_size>100000</max_update_queue_size>
|
||||
<!-- Max timeout in milliseconds for push update task into queue. -->
|
||||
<update_queue_push_timeout_milliseconds>10</update_queue_push_timeout_milliseconds>
|
||||
<!-- Max wait timeout in milliseconds for update task to complete. -->
|
||||
<query_wait_timeout_milliseconds>60000</query_wait_timeout_milliseconds>
|
||||
<!-- Max threads for cache dictionary update. -->
|
||||
<max_threads_for_updates>4</max_threads_for_updates>
|
||||
</cache>
|
||||
</layout>
|
||||
```
|
||||
@ -315,7 +329,7 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
|
||||
|
||||
### ssd_cache {#ssd-cache}
|
||||
|
||||
Similar to `cache`, but stores data on SSD and index in RAM.
|
||||
Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries.
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
|
@ -85,7 +85,7 @@ hex(arg)
|
||||
|
||||
The function is using uppercase letters `A-F` and not using any prefixes (like `0x`) or suffixes (like `h`).
|
||||
|
||||
For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big endian or “human readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if leading digit is zero.
|
||||
For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -105,7 +105,7 @@ Values of type `Date` and `DateTime` are formatted as corresponding integers (th
|
||||
|
||||
For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
|
||||
|
||||
Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted.
|
||||
Values of floating point and Decimal types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -206,6 +206,141 @@ Result:
|
||||
└──────┘
|
||||
```
|
||||
|
||||
## bin {#bin}
|
||||
|
||||
Returns a string containing the argument’s binary representation.
|
||||
|
||||
Alias: `BIN`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
bin(arg)
|
||||
```
|
||||
|
||||
For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT bin(1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
00000001
|
||||
```
|
||||
|
||||
Values of type `Date` and `DateTime` are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
|
||||
|
||||
For `String` and `FixedString`, all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted.
|
||||
|
||||
Values of floating-point and Decimal types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arg` — A value to convert to binary. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string with the binary representation of the argument.
|
||||
|
||||
Type: `String`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT bin(toFloat32(number)) as bin_presentation FROM numbers(15, 2);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─bin_presentation─────────────────┐
|
||||
│ 00000000000000000111000001000001 │
|
||||
│ 00000000000000001000000001000001 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT bin(toFloat64(number)) as bin_presentation FROM numbers(15, 2);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─bin_presentation─────────────────────────────────────────────────┐
|
||||
│ 0000000000000000000000000000000000000000000000000010111001000000 │
|
||||
│ 0000000000000000000000000000000000000000000000000011000001000000 │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## unbin {#unbinstr}
|
||||
|
||||
Performs the opposite operation of [bin](#bin). It interprets each pair of binary digits (in the argument) as a number and converts it to the byte represented by the number. The return value is a binary string (BLOB).
|
||||
|
||||
If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions.
|
||||
|
||||
!!! note "Note"
|
||||
If `unbin` is invoked from within the `clickhouse-client`, binary strings display using UTF-8.
|
||||
|
||||
Alias: `UNBIN`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
unbin(arg)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arg` — A string containing any number of binary digits. Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
Supports binary digits `0-1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of bin(N) is not performed by unbin().
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A binary string (BLOB).
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT UNBIN('001100000011000100110010'), UNBIN('0100110101111001010100110101000101001100');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─unbin('001100000011000100110010')─┬─unbin('0100110101111001010100110101000101001100')─┐
|
||||
│ 012 │ MySQL │
|
||||
└───────────────────────────────────┴───────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT reinterpretAsUInt64(reverse(unbin('1010'))) AS num;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─num─┐
|
||||
│ 10 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## UUIDStringToNum(str) {#uuidstringtonumstr}
|
||||
|
||||
Accepts a string containing 36 characters in the format `123e4567-e89b-12d3-a456-426655440000`, and returns it as a set of bytes in a FixedString(16).
|
||||
|
132
docs/en/sql-reference/functions/nlp-functions.md
Normal file
132
docs/en/sql-reference/functions/nlp-functions.md
Normal file
@ -0,0 +1,132 @@
|
||||
---
|
||||
toc_priority: 67
|
||||
toc_title: NLP
|
||||
---
|
||||
|
||||
# [experimental] Natural Language Processing functions {#nlp-functions}
|
||||
|
||||
!!! warning "Warning"
|
||||
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
|
||||
|
||||
## stem {#stem}
|
||||
|
||||
Performs stemming on a given word.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
stem('language', word)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res────────────────────────────────────────────────┐
|
||||
│ ['I','think','it','is','a','bless','in','disguis'] │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## lemmatize {#lemmatize}
|
||||
|
||||
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
lemmatize('language', word)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT lemmatize('en', 'wolves');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─lemmatize("wolves")─┐
|
||||
│ "wolf" │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<lemmatizers>
|
||||
<lemmatizer>
|
||||
<lang>en</lang>
|
||||
<path>en.bin</path>
|
||||
</lemmatizer>
|
||||
</lemmatizers>
|
||||
```
|
||||
|
||||
## synonyms {#synonyms}
|
||||
|
||||
Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`.
|
||||
|
||||
With the `plain` extension type we need to provide a path to a simple text file, where each line corresponds to a certain synonym set. Words in this line must be separated with space or tab characters.
|
||||
|
||||
With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
synonyms('extension_name', word)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT synonyms('list', 'important');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─synonyms('list', 'important')────────────┐
|
||||
│ ['important','big','critical','crucial'] │
|
||||
└──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Configuration:
|
||||
``` xml
|
||||
<synonyms_extensions>
|
||||
<extension>
|
||||
<name>en</name>
|
||||
<type>plain</type>
|
||||
<path>en.txt</path>
|
||||
</extension>
|
||||
<extension>
|
||||
<name>en</name>
|
||||
<type>wordnet</type>
|
||||
<path>en/</path>
|
||||
</extension>
|
||||
</synonyms_extensions>
|
||||
```
|
@ -145,6 +145,72 @@ Result:
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
## splitByWhitespace(s) {#splitbywhitespaceseparator-s}
|
||||
|
||||
Splits a string into substrings separated by whitespace characters.
|
||||
Returns an array of selected substrings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
splitByWhitespace(s)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns an array of selected substrings.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT splitByWhitespace(' 1! a, b. ');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─splitByWhitespace(' 1! a, b. ')─┐
|
||||
│ ['1!','a,','b.'] │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## splitByNonAlpha(s) {#splitbynonalphaseparator-s}
|
||||
|
||||
Splits a string into substrings separated by whitespace and punctuation characters.
|
||||
Returns an array of selected substrings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
splitByNonAlpha(s)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns an array of selected substrings.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT splitByNonAlpha(' 1! a, b. ');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─splitByNonAlpha(' 1! a, b. ')─┐
|
||||
│ ['1','a','b'] │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
|
||||
|
||||
Concatenates the strings listed in the array with the separator.’separator’ is an optional parameter: a constant string, set to an empty string by default.
|
||||
|
@ -13,13 +13,14 @@ toc_title: Strings
|
||||
Returns 1 for an empty string or 0 for a non-empty string.
|
||||
The result type is UInt8.
|
||||
A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte.
|
||||
The function also works for arrays.
|
||||
The function also works for arrays or UUID.
|
||||
UUID is empty if it is all zeros (nil UUID).
|
||||
|
||||
## notEmpty {#notempty}
|
||||
|
||||
Returns 0 for an empty string or 1 for a non-empty string.
|
||||
The result type is UInt8.
|
||||
The function also works for arrays.
|
||||
The function also works for arrays or UUID.
|
||||
|
||||
## length {#length}
|
||||
|
||||
|
@ -20,12 +20,11 @@ The following actions are supported:
|
||||
|
||||
- [ADD COLUMN](#alter_add-column) — Adds a new column to the table.
|
||||
- [DROP COLUMN](#alter_drop-column) — Deletes the column.
|
||||
- [RENAME COLUMN](#alter_rename-column) — Renames the column.
|
||||
- [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
|
||||
- [CLEAR COLUMN](#alter_clear-column) — Resets column values.
|
||||
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
||||
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
|
||||
- [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
|
||||
|
||||
These actions are described in detail below.
|
||||
|
||||
@ -35,7 +34,7 @@ These actions are described in detail below.
|
||||
ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
|
||||
```
|
||||
|
||||
Adds a new column to the table with the specified `name`, `type`, [`codec`](../../../sql-reference/statements/create/table.md#codecs) and `default_expr` (see the section [Default expressions](../../../sql-reference/statements/create/table.md#create-default-values)).
|
||||
Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md#codecs) and `default_expr` (see the section [Default expressions](../../../sql-reference/statements/create/table.md#create-default-values)).
|
||||
|
||||
If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
|
||||
|
||||
@ -64,6 +63,7 @@ Added2 UInt32
|
||||
ToDrop UInt32
|
||||
Added3 UInt32
|
||||
```
|
||||
|
||||
## DROP COLUMN {#alter_drop-column}
|
||||
|
||||
``` sql
|
||||
@ -118,7 +118,7 @@ ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple()
|
||||
## COMMENT COLUMN {#alter_comment-column}
|
||||
|
||||
``` sql
|
||||
COMMENT COLUMN [IF EXISTS] name 'comment'
|
||||
COMMENT COLUMN [IF EXISTS] name 'Text comment'
|
||||
```
|
||||
|
||||
Adds a comment to the column. If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.
|
||||
@ -136,7 +136,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for
|
||||
## MODIFY COLUMN {#alter_modify-column}
|
||||
|
||||
``` sql
|
||||
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
|
||||
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
|
||||
```
|
||||
|
||||
This query changes the `name` column properties:
|
||||
@ -145,8 +145,12 @@ This query changes the `name` column properties:
|
||||
|
||||
- Default expression
|
||||
|
||||
- Compression Codec
|
||||
|
||||
- TTL
|
||||
|
||||
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md#codecs).
|
||||
|
||||
For examples of columns TTL modifying, see [Column TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
|
||||
|
||||
If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.
|
||||
@ -179,6 +183,8 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
|
||||
**Example**
|
||||
|
||||
Remove TTL:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
@ -187,22 +193,6 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## RENAME COLUMN {#alter_rename-column}
|
||||
|
||||
Renames an existing column.
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
|
||||
```
|
||||
|
||||
## Limitations {#alter-query-limitations}
|
||||
|
||||
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
|
||||
@ -213,4 +203,4 @@ If the `ALTER` query is not sufficient to make the table changes you need, you c
|
||||
|
||||
The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.
|
||||
|
||||
For tables that do not store data themselves (such as `Merge` and `Distributed`), `ALTER` just changes the table structure, and does not change the structure of subordinate tables. For example, when running ALTER for a `Distributed` table, you will also need to run `ALTER` for the tables on all remote servers.
|
||||
For tables that do not store data themselves (such as [Merge](../../../sql-reference/statements/alter/index.md) and [Distributed](../../../sql-reference/statements/alter/index.md)), `ALTER` just changes the table structure, and does not change the structure of subordinate tables. For example, when running ALTER for a `Distributed` table, you will also need to run `ALTER` for the tables on all remote servers.
|
||||
|
60
docs/en/sql-reference/statements/alter/setting.md
Normal file
60
docs/en/sql-reference/statements/alter/setting.md
Normal file
@ -0,0 +1,60 @@
|
||||
---
|
||||
toc_priority: 38
|
||||
toc_title: SETTING
|
||||
---
|
||||
|
||||
# Table Settings Manipulations {#table_settings_manipulations}
|
||||
|
||||
There is a set of queries to change table settings. You can modify settings or reset them to default values. A single query can change several settings at once.
|
||||
If a setting with the specified name does not exist, then the query raises an exception.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
|
||||
```
|
||||
|
||||
!!! note "Note"
|
||||
These queries can be applied to [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) tables only.
|
||||
|
||||
|
||||
## MODIFY SETTING {#alter_modify_setting}
|
||||
|
||||
Changes table settings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
MODIFY SETTING setting_name=value [, ...]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id;
|
||||
|
||||
ALTER TABLE example_table MODIFY SETTING max_part_loading_threads=8, max_parts_in_total=50000;
|
||||
```
|
||||
|
||||
## RESET SETTING {#alter_reset_setting}
|
||||
|
||||
Resets table settings to their default values. If a setting is in a default state, then no action is taken.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
RESET SETTING setting_name [, ...]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id
|
||||
SETTINGS max_part_loading_threads=8;
|
||||
|
||||
ALTER TABLE example_table RESET SETTING max_part_loading_threads;
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [MergeTree settings](../../../operations/settings/merge-tree-settings.md)
|
@ -254,6 +254,20 @@ CREATE TABLE codec_example
|
||||
ENGINE = MergeTree()
|
||||
```
|
||||
|
||||
### Encryption Codecs {#create-query-encryption-codecs}
|
||||
|
||||
These codecs don't actually compress data, but instead encrypt data on disk. These are only available when an encryption key is specified by [encryption](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption) settings. Note that encryption only makes sense at the end of codec pipelines, because encrypted data usually can't be compressed in any meaningful way.
|
||||
|
||||
Encryption codecs:
|
||||
|
||||
- `Encrypted('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode. This codec uses a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence).
|
||||
|
||||
!!! attention "Attention"
|
||||
Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed.
|
||||
|
||||
!!! attention "Attention"
|
||||
If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging.
|
||||
|
||||
## Temporary Tables {#temporary-tables}
|
||||
|
||||
ClickHouse supports temporary tables which have the following characteristics:
|
||||
|
@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}]
|
||||
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[DEFAULT ROLE role [,...]]
|
||||
[DEFAULT DATABASE database | NONE]
|
||||
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
@ -274,28 +274,28 @@ This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../
|
||||
`WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters.
|
||||
All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults.
|
||||
|
||||
Use following syntax for filling multiple columns add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
|
||||
To fill multiple columns, add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
|
||||
|
||||
``` sql
|
||||
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
|
||||
```
|
||||
|
||||
`WITH FILL` can be applied only for fields with Numeric (all kind of float, decimal, int) or Date/DateTime types.
|
||||
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
|
||||
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
|
||||
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
|
||||
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
|
||||
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
|
||||
|
||||
For example, the following query
|
||||
Example of a query without `WITH FILL`:
|
||||
|
||||
``` sql
|
||||
SELECT n, source FROM (
|
||||
SELECT toFloat32(number % 10) AS n, 'original' AS source
|
||||
FROM numbers(10) WHERE number % 3 = 1
|
||||
) ORDER BY n
|
||||
) ORDER BY n;
|
||||
```
|
||||
|
||||
returns
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─n─┬─source───┐
|
||||
@ -305,16 +305,16 @@ returns
|
||||
└───┴──────────┘
|
||||
```
|
||||
|
||||
but after apply `WITH FILL` modifier
|
||||
Same query after applying `WITH FILL` modifier:
|
||||
|
||||
``` sql
|
||||
SELECT n, source FROM (
|
||||
SELECT toFloat32(number % 10) AS n, 'original' AS source
|
||||
FROM numbers(10) WHERE number % 3 = 1
|
||||
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
|
||||
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5;
|
||||
```
|
||||
|
||||
returns
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───n─┬─source───┐
|
||||
@ -334,7 +334,7 @@ returns
|
||||
└─────┴──────────┘
|
||||
```
|
||||
|
||||
For the case when we have multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in `ORDER BY` clause.
|
||||
For the case with multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in the `ORDER BY` clause.
|
||||
|
||||
Example:
|
||||
|
||||
@ -350,7 +350,7 @@ ORDER BY
|
||||
d1 WITH FILL STEP 5;
|
||||
```
|
||||
|
||||
returns
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───d1───────┬───d2───────┬─source───┐
|
||||
@ -364,9 +364,9 @@ returns
|
||||
└────────────┴────────────┴──────────┘
|
||||
```
|
||||
|
||||
Field `d1` does not fill and use default value cause we do not have repeated values for `d2` value, and sequence for `d1` can’t be properly calculated.
|
||||
Field `d1` does not fill in and use the default value cause we do not have repeated values for `d2` value, and the sequence for `d1` can’t be properly calculated.
|
||||
|
||||
The following query with a changed field in `ORDER BY`
|
||||
The following query with the changed field in `ORDER BY`:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
@ -380,7 +380,7 @@ ORDER BY
|
||||
d2 WITH FILL;
|
||||
```
|
||||
|
||||
returns
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───d1───────┬───d2───────┬─source───┐
|
||||
|
@ -45,7 +45,7 @@ toc_title: "\u7D71\u5408"
|
||||
- 監視
|
||||
- [黒鉛](https://graphiteapp.org)
|
||||
- [グラファウス](https://github.com/yandex/graphouse)
|
||||
- [カーボンクリックハウス](https://github.com/lomik/carbon-clickhouse) +
|
||||
- [カーボンクリックハウス](https://github.com/lomik/carbon-clickhouse)
|
||||
- [グラファイト-クリック](https://github.com/lomik/graphite-clickhouse)
|
||||
- [黒鉛-ch-オプティマイザー](https://github.com/innogames/graphite-ch-optimizer) -staled仕切りを最大限に活用する [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) からのルールの場合 [ロールアップ構成](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) 応用できます
|
||||
- [グラファナ](https://grafana.com/)
|
||||
|
@ -38,3 +38,15 @@ toc_title: "Используемые сторонние библиотеки"
|
||||
| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) |
|
||||
| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) |
|
||||
| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) |
|
||||
|
||||
## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries}
|
||||
|
||||
1. Весь внешний сторонний код должен находиться в отдельных папках внутри папки `contrib` репозитория ClickHouse. По возможности, используйте сабмодули Git.
|
||||
2. Клонируйте официальный репозиторий [Clickhouse-extras](https://github.com/ClickHouse-Extras). Используйте официальные репозитории GitHub, если они доступны.
|
||||
3. Создавайте новую ветку на основе той ветки, которую вы хотите интегрировать: например, `master` -> `clickhouse/master` или `release/vX.Y.Z` -> `clickhouse/release/vX.Y.Z`.
|
||||
4. Все копии [Clickhouse-extras](https://github.com/ClickHouse-Extras) можно автоматически синхронизировать с удаленными репозиториями. Ветки `clickhouse/...` останутся незатронутыми, поскольку скорее всего никто не будет использовать этот шаблон именования в своих репозиториях.
|
||||
5. Добавьте сабмодули в папку `contrib` репозитория ClickHouse, на который ссылаются клонированные репозитории. Настройте сабмодули для отслеживания изменений в соответствующих ветках `clickhouse/...`.
|
||||
6. Каждый раз, когда необходимо внести изменения в код библиотеки, следует создавать отдельную ветку, например `clickhouse/my-fix`. Затем эта ветка должна быть слита (`merge`) в ветку, отслеживаемую сабмодулем, например, в `clickhouse/master` или `clickhouse/release/vX.Y.Z`.
|
||||
7. Не добавляйте код в клоны репозитория [Clickhouse-extras](https://github.com/ClickHouse-Extras), если имя ветки не соответствует шаблону `clickhouse/...`.
|
||||
8. Всегда вносите изменения с учетом того, что они попадут в официальный репозиторий. После того как PR будет влит из (ветки разработки/исправлений) вашего личного клона репозитория в [Clickhouse-extras](https://github.com/ClickHouse-Extras), и сабмодуль будет добавлен в репозиторий ClickHouse, рекомендуется сделать еще один PR из (ветки разработки/исправлений) репозитория [Clickhouse-extras](https://github.com/ClickHouse-Extras) в официальный репозиторий библиотеки. Таким образом будут решены следующие задачи: 1) публикуемый код может быть использован многократно и будет иметь более высокую ценность; 2) другие пользователи также смогут использовать его в своих целях; 3) поддержкой кода будут заниматься не только разработчики ClickHouse.
|
||||
9. Чтобы сабмодуль начал использовать новый код из исходной ветки (например, `master`), сначала следует аккуратно выполнить слияние (`master` -> `clickhouse/master`), и только после этого изменения могут быть добавлены в основной репозиторий ClickHouse. Это связано с тем, что в отслеживаемую ветку (например, `clickhouse/master`) могут быть внесены изменения, и поэтому ветка может отличаться от первоисточника (`master`).
|
||||
|
@ -92,7 +92,7 @@ ClickHouse не работает и не собирается на 32-битны
|
||||
# Две последние команды могут быть объединены вместе:
|
||||
git submodule update --init
|
||||
|
||||
The next commands would help you to reset all submodules to the initial state (!WARING! - any chenges inside will be deleted):
|
||||
The next commands would help you to reset all submodules to the initial state (!WARING! - any changes inside will be deleted):
|
||||
Следующие команды помогут сбросить все сабмодули в изначальное состояние (!ВНИМАНИЕ! - все изменения в сабмодулях будут утеряны):
|
||||
|
||||
# Synchronizes submodules' remote URL with .gitmodules
|
||||
@ -242,6 +242,8 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
|
||||
Стиль кода: https://clickhouse.tech/docs/ru/development/style/
|
||||
|
||||
Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений: https://clickhouse.tech/docs/ru/development/contrib/#adding-third-party-libraries
|
||||
|
||||
Разработка тестов: https://clickhouse.tech/docs/ru/development/tests/
|
||||
|
||||
Список задач: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3A%22easy+task%22
|
||||
|
@ -820,11 +820,11 @@ The dictionary is configured incorrectly.
|
||||
|
||||
**10.** Ненужный код удаляется из исходников.
|
||||
|
||||
## Библиотеки {#biblioteki}
|
||||
## Библиотеки {#libraries}
|
||||
|
||||
**1.** Используются стандартная библиотека C++20 (допустимо использовать экспериментальные расширения) а также фреймворки `boost`, `Poco`.
|
||||
**1.** Используются стандартные библиотеки C++20 (допустимо использовать экспериментальные расширения), а также фреймворки `boost`, `Poco`.
|
||||
|
||||
**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС или любые другие способы установки библиотек в систему.
|
||||
**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС, или любые другие способы установки библиотек в систему. Подробнее смотрите раздел [Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений](contrib.md#adding-third-party-libraries).
|
||||
|
||||
**3.** Предпочтение отдаётся уже использующимся библиотекам.
|
||||
|
||||
@ -902,4 +902,3 @@ function(
|
||||
const & RangesInDataParts ranges,
|
||||
size_t limit)
|
||||
```
|
||||
|
||||
|
@ -14,7 +14,7 @@ toc_title: "Введение"
|
||||
|
||||
- [MySQL](../../engines/database-engines/mysql.md)
|
||||
|
||||
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
|
||||
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
|
||||
|
||||
- [Lazy](../../engines/database-engines/lazy.md)
|
||||
|
||||
|
@ -1,22 +1,22 @@
|
||||
|
||||
---
|
||||
toc_priority: 29
|
||||
toc_title: MaterializeMySQL
|
||||
toc_title: MaterializedMySQL
|
||||
---
|
||||
|
||||
# MaterializeMySQL {#materialize-mysql}
|
||||
# MaterializedMySQL {#materialized-mysql}
|
||||
|
||||
Создает базу данных ClickHouse со всеми таблицами, существующими в MySQL, и всеми данными в этих таблицах.
|
||||
|
||||
Сервер ClickHouse работает как реплика MySQL. Он читает файл binlog и выполняет DDL and DML-запросы.
|
||||
|
||||
`MaterializeMySQL` — экспериментальный движок баз данных.
|
||||
`MaterializedMySQL` — экспериментальный движок баз данных.
|
||||
|
||||
## Создание базы данных {#creating-a-database}
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
|
||||
ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
|
||||
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
|
||||
```
|
||||
|
||||
**Параметры движка**
|
||||
@ -28,7 +28,7 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor
|
||||
|
||||
## Виртуальные столбцы {#virtual-columns}
|
||||
|
||||
При работе с движком баз данных `MaterializeMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`.
|
||||
При работе с движком баз данных `MaterializedMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`.
|
||||
|
||||
- `_version` — счетчик транзакций. Тип [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `_sign` — метка удаления. Тип [Int8](../../sql-reference/data-types/int-uint.md). Возможные значения:
|
||||
@ -75,9 +75,9 @@ DDL-запросы в MySQL конвертируются в соответств
|
||||
|
||||
- Запрос `UPDATE` конвертируется в ClickHouse в `INSERT` с `_sign=-1` и `INSERT` с `_sign=1`.
|
||||
|
||||
### Выборка из таблиц движка MaterializeMySQL {#select}
|
||||
### Выборка из таблиц движка MaterializedMySQL {#select}
|
||||
|
||||
Запрос `SELECT` из таблиц движка `MaterializeMySQL` имеет некоторую специфику:
|
||||
Запрос `SELECT` из таблиц движка `MaterializedMySQL` имеет некоторую специфику:
|
||||
|
||||
- Если в запросе `SELECT` напрямую не указан столбец `_version`, то используется модификатор [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираются только строки с `MAX(_version)`.
|
||||
|
||||
@ -94,10 +94,10 @@ DDL-запросы в MySQL конвертируются в соответств
|
||||
**Примечание**
|
||||
|
||||
- Строки с `_sign=-1` физически не удаляются из таблиц.
|
||||
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
|
||||
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializedMySQL`.
|
||||
- Репликация может быть легко нарушена.
|
||||
- Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
|
||||
- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
|
||||
- Прямые операции изменения данных в таблицах и базах данных `MaterializedMySQL` запрещены.
|
||||
- На работу `MaterializedMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializedMySQL`.
|
||||
|
||||
## Примеры использования {#examples-of-use}
|
||||
|
||||
@ -126,7 +126,7 @@ mysql> SELECT * FROM test;
|
||||
База данных и созданная таблица:
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***');
|
||||
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
|
||||
SHOW TABLES FROM mysql;
|
||||
```
|
||||
|
@ -14,6 +14,8 @@ toc_priority: 29
|
||||
- [Log](log.md)
|
||||
- [TinyLog](tinylog.md)
|
||||
|
||||
Табличные движки семейства `Log` могут хранить данные в распределенных файловых системах [HDFS](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-hdfs) или [S3](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
|
||||
## Общие свойства {#obshchie-svoistva}
|
||||
|
||||
Движки:
|
||||
|
@ -5,9 +5,8 @@ toc_title: Log
|
||||
|
||||
# Log {#log}
|
||||
|
||||
Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](index.md).
|
||||
|
||||
Отличается от [TinyLog](tinylog.md) тем, что вместе с файлами столбцов лежит небольшой файл «засечек». Засечки пишутся на каждый блок данных и содержат смещение - с какого места нужно читать файл, чтобы пропустить заданное количество строк. Это позволяет читать данные из таблицы в несколько потоков.
|
||||
При конкурентном доступе к данным, чтения могут выполняться одновременно, а записи блокируют чтения и друг друга.
|
||||
Движок Log не поддерживает индексы. Также, если при записи в таблицу произошёл сбой, то таблица станет битой, и чтения из неё будут возвращать ошибку. Движок Log подходит для временных данных, write-once таблиц, а также для тестовых и демонстрационных целей.
|
||||
Движок относится к семейству движков `Log`. Смотрите общие свойства и различия движков в статье [Семейство Log](../../../engines/table-engines/log-family/index.md).
|
||||
|
||||
Отличается от [TinyLog](../../../engines/table-engines/log-family/tinylog.md) тем, что вместе с файлами столбцов лежит небольшой файл "засечек". Засечки пишутся на каждый блок данных и содержат смещение: с какого места нужно читать файл, чтобы пропустить заданное количество строк. Это позволяет читать данные из таблицы в несколько потоков.
|
||||
При конкурентном доступе к данным чтения могут выполняться одновременно, а записи блокируют чтения и друг друга.
|
||||
Движок `Log` не поддерживает индексы. Также, если при записи в таблицу произошёл сбой, то таблица станет битой, и чтения из нее будут возвращать ошибку. Движок `Log` подходит для временных данных, write-once таблиц, а также для тестовых и демонстрационных целей.
|
||||
|
@ -771,7 +771,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
- `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`.
|
||||
|
||||
|
||||
Диск S3 может быть сконфигурирован как `main` или `cold`:
|
||||
|
||||
``` xml
|
||||
@ -810,3 +809,44 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
```
|
||||
|
||||
Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`.
|
||||
|
||||
## Использование сервиса HDFS для хранения данных {#table_engine-mergetree-hdfs}
|
||||
|
||||
[HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) — это распределенная файловая система для удаленного хранения данных.
|
||||
|
||||
Таблицы семейства `MergeTree` могут хранить данные в сервисе HDFS при использовании диска типа `HDFS`.
|
||||
|
||||
Пример конфигурации:
|
||||
``` xml
|
||||
<yandex>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
|
||||
</hdfs>
|
||||
</disks>
|
||||
<policies>
|
||||
<hdfs>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>hdfs</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</hdfs>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
</merge_tree>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Обязательные параметры:
|
||||
|
||||
- `endpoint` — URL точки приема запроса на стороне HDFS в формате `path`. URL точки должен содержать путь к корневой директории на сервере, где хранятся данные.
|
||||
|
||||
Необязательные параметры:
|
||||
|
||||
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.
|
||||
|
@ -43,7 +43,7 @@ toc_title: "Библиотеки для интеграции от сторонн
|
||||
- Мониторинг
|
||||
- [Graphite](https://graphiteapp.org)
|
||||
- [graphouse](https://github.com/yandex/graphouse)
|
||||
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) +
|
||||
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse)
|
||||
- [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse)
|
||||
- [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - оптимизирует партиции таблиц [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) согласно правилам в [конфигурации rollup](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration)
|
||||
- [Grafana](https://grafana.com/)
|
||||
|
@ -277,4 +277,15 @@ Eсли суммарное число активных кусков во все
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->
|
||||
## check_sample_column_is_correct {#check_sample_column_is_correct}
|
||||
|
||||
Разрешает проверку того, что тип данных столбца для сэмплирования или выражения сэмплирования при создании таблицы верный. Тип данных должен соответствовать одному из беззнаковых [целочисленных типов](../../sql-reference/data-types/int-uint.md): `UInt8`, `UInt16`, `UInt32`, `UInt64`.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- true — проверка включена.
|
||||
- false — проверка при создании таблицы не проводится.
|
||||
|
||||
Значение по умолчанию: `true`.
|
||||
|
||||
По умолчанию сервер ClickHouse при создании таблицы проверяет тип данных столбца для сэмплирования или выражения сэмплирования. Если уже существуют таблицы с некорректным выражением сэмплирования, то чтобы не возникало исключение при запуске сервера, установите `check_sample_column_is_correct` в значение `false`.
|
||||
|
@ -25,6 +25,30 @@ ClickHouse применяет настройку в тех случаях, ко
|
||||
- `global` — заменяет запрос `IN`/`JOIN` на `GLOBAL IN`/`GLOBAL JOIN.`
|
||||
- `allow` — разрешает использование таких подзапросов.
|
||||
|
||||
## prefer_global_in_and_join {#prefer-global-in-and-join}
|
||||
|
||||
Заменяет запрос `IN`/`JOIN` на `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — выключена. Операторы `IN`/`JOIN` не заменяются на `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
- 1 — включена. Операторы `IN`/`JOIN` заменяются на `GLOBAL IN`/`GLOBAL JOIN`.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
**Использование**
|
||||
|
||||
Настройка `SET distributed_product_mode=global` меняет поведение запросов для распределенных таблиц, но она не подходит для локальных таблиц или таблиц из внешних источников. В этих случаях удобно использовать настройку `prefer_global_in_and_join`.
|
||||
|
||||
Например, если нужно объединить все данные из локальных таблиц, которые находятся на разных узлах — для распределенной обработки необходим `GLOBAL JOIN`.
|
||||
|
||||
Другой вариант использования настройки `prefer_global_in_and_join` — регулирование обращений к таблицам из внешних источников.
|
||||
Эта настройка помогает уменьшить количество обращений к внешним ресурсам при объединении внешних таблиц: только один вызов на весь распределенный запрос.
|
||||
|
||||
**См. также:**
|
||||
|
||||
- [Распределенные подзапросы](../../sql-reference/operators/in.md#select-distributed-subqueries) `GLOBAL IN`/`GLOBAL JOIN`
|
||||
|
||||
## enable_optimize_predicate_expression {#enable-optimize-predicate-expression}
|
||||
|
||||
Включает пробрасывание предикатов в подзапросы для запросов `SELECT`.
|
||||
@ -2777,7 +2801,7 @@ SELECT * FROM test2;
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
|
||||
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md).
|
||||
|
||||
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
|
||||
|
||||
|
@ -35,4 +35,3 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
|
||||
- [system.events](#system_tables-events) — таблица с количеством произошедших событий.
|
||||
- [system.metric_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`.
|
||||
|
||||
|
@ -61,4 +61,3 @@ exception_code: ZOK
|
||||
2 rows in set. Elapsed: 0.025 sec.
|
||||
```
|
||||
|
||||
|
@ -51,6 +51,7 @@ ClickHouse не удаляет данные из таблица автомати
|
||||
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе.
|
||||
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе.
|
||||
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе.
|
||||
- `projections` ([String](../../sql-reference/data-types/string.md)) — имена проекций, использованных при выполнении запроса.
|
||||
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.
|
||||
- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения, если запрос завершился по исключению.
|
||||
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [stack trace](https://en.wikipedia.org/wiki/Stack_trace). Пустая строка, если запрос успешно завершен.
|
||||
@ -65,6 +66,8 @@ ClickHouse не удаляет данные из таблица автомати
|
||||
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID родительского запроса.
|
||||
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел родительский запрос.
|
||||
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт, с которого клиент сделал родительский запрос.
|
||||
- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса (для распределенных запросов).
|
||||
- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд (для распределенных запросов).
|
||||
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — интерфейс, с которого ушёл запрос. Возможные значения:
|
||||
- 1 — TCP.
|
||||
- 2 — HTTP.
|
||||
@ -101,55 +104,77 @@ ClickHouse не удаляет данные из таблица автомати
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
|
||||
SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_time DESC LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
type: QueryStart
|
||||
event_date: 2020-09-11
|
||||
event_time: 2020-09-11 10:08:17
|
||||
event_time_microseconds: 2020-09-11 10:08:17.063321
|
||||
query_start_time: 2020-09-11 10:08:17
|
||||
query_start_time_microseconds: 2020-09-11 10:08:17.063321
|
||||
query_duration_ms: 0
|
||||
read_rows: 0
|
||||
read_bytes: 0
|
||||
type: QueryFinish
|
||||
event_date: 2021-07-28
|
||||
event_time: 2021-07-28 13:46:56
|
||||
event_time_microseconds: 2021-07-28 13:46:56.719791
|
||||
query_start_time: 2021-07-28 13:46:56
|
||||
query_start_time_microseconds: 2021-07-28 13:46:56.704542
|
||||
query_duration_ms: 14
|
||||
read_rows: 8393
|
||||
read_bytes: 374325
|
||||
written_rows: 0
|
||||
written_bytes: 0
|
||||
result_rows: 0
|
||||
result_bytes: 0
|
||||
memory_usage: 0
|
||||
result_rows: 4201
|
||||
result_bytes: 153024
|
||||
memory_usage: 4714038
|
||||
current_database: default
|
||||
query: INSERT INTO test1 VALUES
|
||||
query: SELECT DISTINCT arrayJoin(extractAll(name, '[\\w_]{2,}')) AS res FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.table_engines UNION ALL SELECT name FROM system.formats UNION ALL SELECT name FROM system.table_functions UNION ALL SELECT name FROM system.data_type_families UNION ALL SELECT name FROM system.merge_tree_settings UNION ALL SELECT name FROM system.settings UNION ALL SELECT cluster FROM system.clusters UNION ALL SELECT macro FROM system.macros UNION ALL SELECT policy_name FROM system.storage_policies UNION ALL SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate UNION ALL SELECT name FROM system.databases LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.tables LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.dictionaries LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.columns LIMIT 10000) WHERE notEmpty(res)
|
||||
normalized_query_hash: 6666026786019643712
|
||||
query_kind: Select
|
||||
databases: ['system']
|
||||
tables: ['system.aggregate_function_combinators','system.clusters','system.columns','system.data_type_families','system.databases','system.dictionaries','system.formats','system.functions','system.macros','system.merge_tree_settings','system.settings','system.storage_policies','system.table_engines','system.table_functions','system.tables']
|
||||
columns: ['system.aggregate_function_combinators.name','system.clusters.cluster','system.columns.name','system.data_type_families.name','system.databases.name','system.dictionaries.name','system.formats.name','system.functions.is_aggregate','system.functions.name','system.macros.macro','system.merge_tree_settings.name','system.settings.name','system.storage_policies.policy_name','system.table_engines.name','system.table_functions.name','system.tables.name']
|
||||
projections: []
|
||||
exception_code: 0
|
||||
exception:
|
||||
stack_trace:
|
||||
is_initial_query: 1
|
||||
user: default
|
||||
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
|
||||
query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
|
||||
address: ::ffff:127.0.0.1
|
||||
port: 33452
|
||||
port: 51006
|
||||
initial_user: default
|
||||
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
|
||||
initial_query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf
|
||||
initial_address: ::ffff:127.0.0.1
|
||||
initial_port: 33452
|
||||
initial_port: 51006
|
||||
initial_query_start_time: 2021-07-28 13:46:56
|
||||
initial_query_start_time_microseconds: 2021-07-28 13:46:56.704542
|
||||
interface: 1
|
||||
os_user: bharatnc
|
||||
client_hostname: tower
|
||||
client_name: ClickHouse
|
||||
client_revision: 54437
|
||||
client_version_major: 20
|
||||
client_version_minor: 7
|
||||
client_version_patch: 2
|
||||
os_user:
|
||||
client_hostname:
|
||||
client_name: ClickHouse client
|
||||
client_revision: 54449
|
||||
client_version_major: 21
|
||||
client_version_minor: 8
|
||||
client_version_patch: 0
|
||||
http_method: 0
|
||||
http_user_agent:
|
||||
http_referer:
|
||||
forwarded_for:
|
||||
quota_key:
|
||||
revision: 54440
|
||||
thread_ids: []
|
||||
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
|
||||
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
|
||||
revision: 54453
|
||||
log_comment:
|
||||
thread_ids: [5058,22097,22110,22094]
|
||||
ProfileEvents.Names: ['Query','SelectQuery','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSWriteChars']
|
||||
ProfileEvents.Values: [1,1,39,352256,64,360,8393,374325,412,440,34480,13108,4723,671,19,17828,8192,10240]
|
||||
Settings.Names: ['load_balancing','max_memory_usage']
|
||||
Settings.Values: ['random','10000000000']
|
||||
used_aggregate_functions: []
|
||||
used_aggregate_function_combinators: []
|
||||
used_database_engines: []
|
||||
used_data_type_families: ['UInt64','UInt8','Nullable','String','date']
|
||||
used_dictionaries: []
|
||||
used_formats: []
|
||||
used_functions: ['concat','notEmpty','extractAll']
|
||||
used_storages: []
|
||||
used_table_functions: []
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
132
docs/ru/sql-reference/functions/nlp-functions.md
Normal file
132
docs/ru/sql-reference/functions/nlp-functions.md
Normal file
@ -0,0 +1,132 @@
|
||||
---
|
||||
toc_priority: 67
|
||||
toc_title: NLP
|
||||
---
|
||||
|
||||
# [экспериментально] Функции для работы с ествественным языком {#nlp-functions}
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Сейчас использование функций для работы с ествественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
|
||||
|
||||
## stem {#stem}
|
||||
|
||||
Данная функция проводит стемминг заданного слова.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
stem('language', word)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `language` — Язык, правила которого будут применены для стемминга. Допускается только нижний регистр. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `word` — Слово подлежащее стеммингу. Допускается только нижний регистр. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res────────────────────────────────────────────────┐
|
||||
│ ['I','think','it','is','a','bless','in','disguis'] │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## lemmatize {#lemmatize}
|
||||
|
||||
Данная функция проводит лемматизацию для заданного слова. Для работы лемматизатора необходимы словари, которые можно найти [здесь](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
lemmatize('language', word)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `language` — Язык, правила которого будут применены для лемматизации. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `word` — Слово, подлежащее лемматизации. Допускается только нижний регистр. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT lemmatize('en', 'wolves');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─lemmatize("wolves")─┐
|
||||
│ "wolf" │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Конфигурация:
|
||||
``` xml
|
||||
<lemmatizers>
|
||||
<lemmatizer>
|
||||
<lang>en</lang>
|
||||
<path>en.bin</path>
|
||||
</lemmatizer>
|
||||
</lemmatizers>
|
||||
```
|
||||
|
||||
## synonyms {#synonyms}
|
||||
|
||||
Находит синонимы к заданному слову. Представлены два типа расширений словарей: `plain` и `wordnet`.
|
||||
|
||||
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соотвествует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
|
||||
|
||||
Для работы расширения типа `plain` необходимо указать путь до WordNet тезауруса. Тезаурус должен содержать WordNet sense index.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
synonyms('extension_name', word)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `extension_name` — Название расширения, в котором будет проводиться поиск. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `word` — Слово, которое будет искаться в расширении. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT synonyms('list', 'important');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─synonyms('list', 'important')────────────┐
|
||||
│ ['important','big','critical','crucial'] │
|
||||
└──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Конфигурация:
|
||||
``` xml
|
||||
<synonyms_extensions>
|
||||
<extension>
|
||||
<name>en</name>
|
||||
<type>plain</type>
|
||||
<path>en.txt</path>
|
||||
</extension>
|
||||
<extension>
|
||||
<name>en</name>
|
||||
<type>wordnet</type>
|
||||
<path>en/</path>
|
||||
</extension>
|
||||
</synonyms_extensions>
|
||||
```
|
@ -146,6 +146,70 @@ SELECT splitByRegexp('', 'abcde');
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
## splitByWhitespace(s) {#splitbywhitespaceseparator-s}
|
||||
|
||||
Разбивает строку на подстроки, используя в качестве разделителей пробельные символы.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
splitByWhitespace(s)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `s` — разбиваемая строка. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
Возвращает массив подстрок.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT splitByWhitespace(' 1! a, b. ');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─splitByWhitespace(' 1! a, b. ')─┐
|
||||
│ ['1!','a,','b.'] │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## splitByNonAlpha(s) {#splitbynonalphaseparator-s}
|
||||
|
||||
Разбивает строку на подстроки, используя в качестве разделителей пробельные символы и символы пунктуации.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
splitByNonAlpha(s)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `s` — разбиваемая строка. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
Возвращает массив подстрок.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT splitByNonAlpha(' 1! a, b. ');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─splitByNonAlpha(' 1! a, b. ')─┐
|
||||
│ ['1','a','b'] │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
|
||||
|
||||
Склеивает строки, перечисленные в массиве, с разделителем separator.
|
||||
|
@ -5,15 +5,26 @@ toc_title: "Манипуляции со столбцами"
|
||||
|
||||
# Манипуляции со столбцами {#manipuliatsii-so-stolbtsami}
|
||||
|
||||
Набор действий, позволяющих изменять структуру таблицы.
|
||||
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ...
|
||||
```
|
||||
|
||||
В запросе можно указать сразу несколько действий над одной таблицей через запятую.
|
||||
Каждое действие — это манипуляция над столбцом.
|
||||
|
||||
Существуют следующие действия:
|
||||
|
||||
- [ADD COLUMN](#alter_add-column) — добавляет столбец в таблицу;
|
||||
- [DROP COLUMN](#alter_drop-column) — удаляет столбец;
|
||||
- [RENAME COLUMN](#alter_rename-column) — переименовывает существующий столбец.
|
||||
- [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
|
||||
- [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
|
||||
- [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.
|
||||
- [RENAME COLUMN](#alter_rename-column) — переименовывает существующий столбец.
|
||||
|
||||
Подробное описание для каждого действия приведено ниже.
|
||||
|
||||
@ -72,6 +83,22 @@ DROP COLUMN [IF EXISTS] name
|
||||
ALTER TABLE visits DROP COLUMN browser
|
||||
```
|
||||
|
||||
## RENAME COLUMN {#alter_rename-column}
|
||||
|
||||
``` sql
|
||||
RENAME COLUMN [IF EXISTS] name to new_name
|
||||
```
|
||||
|
||||
Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно.
|
||||
|
||||
**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE visits RENAME COLUMN webBrowser TO browser
|
||||
```
|
||||
|
||||
## CLEAR COLUMN {#alter_clear-column}
|
||||
|
||||
``` sql
|
||||
@ -109,7 +136,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'Столбец показывает,
|
||||
## MODIFY COLUMN {#alter_modify-column}
|
||||
|
||||
``` sql
|
||||
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
|
||||
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
|
||||
```
|
||||
|
||||
Запрос изменяет следующие свойства столбца `name`:
|
||||
@ -118,11 +145,15 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | F
|
||||
|
||||
- Значение по умолчанию
|
||||
|
||||
- Кодеки сжатия
|
||||
|
||||
- TTL
|
||||
|
||||
Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
|
||||
Примеры изменения кодеков сжатия смотрите в разделе [Кодеки сжатия столбцов](../create/table.md#codecs).
|
||||
|
||||
Если указано `IF EXISTS`, запрос не возвращает ошибку, если столбца не существует.
|
||||
Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
|
||||
|
||||
Если указано `IF EXISTS`, запрос не возвращает ошибку при условии, что столбец не существует.
|
||||
|
||||
Запрос также может изменять порядок столбцов при помощи `FIRST | AFTER`, смотрите описание [ADD COLUMN](#alter_add-column).
|
||||
|
||||
@ -162,22 +193,6 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## RENAME COLUMN {#alter_rename-column}
|
||||
|
||||
Переименовывает существующий столбец.
|
||||
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
|
||||
```
|
||||
|
||||
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
|
||||
|
||||
Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.
|
||||
@ -186,7 +201,6 @@ ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
|
||||
|
||||
Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
|
||||
|
||||
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть, если на момент запроса `ALTER`, выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время, все новые запросы к той же таблице, будут ждать, пока завершится этот `ALTER`.
|
||||
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.
|
||||
|
||||
Для таблиц, которые не хранят данные самостоятельно (типа [Merge](../../../sql-reference/statements/alter/index.md) и [Distributed](../../../sql-reference/statements/alter/index.md)), `ALTER` всего лишь меняет структуру таблицы, но не меняет структуру подчинённых таблиц. Для примера, при ALTER-е таблицы типа `Distributed`, вам также потребуется выполнить запрос `ALTER` для таблиц на всех удалённых серверах.
|
||||
|
||||
|
60
docs/ru/sql-reference/statements/alter/setting.md
Normal file
60
docs/ru/sql-reference/statements/alter/setting.md
Normal file
@ -0,0 +1,60 @@
|
||||
---
|
||||
toc_priority: 38
|
||||
toc_title: SETTING
|
||||
---
|
||||
|
||||
# Изменение настроек таблицы {#table_settings_manipulations}
|
||||
|
||||
Существуют запросы, которые изменяют настройки таблицы или сбрасывают их в значения по умолчанию. В одном запросе можно изменить сразу несколько настроек.
|
||||
Если настройка с указанным именем не существует, то генерируется исключение.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
|
||||
```
|
||||
|
||||
!!! note "Примечание"
|
||||
Эти запросы могут применяться только к таблицам на движке [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
|
||||
## MODIFY SETTING {#alter_modify_setting}
|
||||
|
||||
Изменяет настройки таблицы.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
MODIFY SETTING setting_name=value [, ...]
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id;
|
||||
|
||||
ALTER TABLE example_table MODIFY SETTING max_part_loading_threads=8, max_parts_in_total=50000;
|
||||
```
|
||||
|
||||
## RESET SETTING {#alter_reset_setting}
|
||||
|
||||
Сбрасывает настройки таблицы в значения по умолчанию. Если настройка уже находится в состоянии по умолчанию, то никакие действия не выполняются.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
RESET SETTING setting_name [, ...]
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id
|
||||
SETTINGS max_part_loading_threads=8;
|
||||
|
||||
ALTER TABLE example_table RESET SETTING max_part_loading_threads;
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Настройки MergeTree таблиц](../../../operations/settings/merge-tree-settings.md)
|
@ -62,7 +62,7 @@ CREATE TABLE example (
|
||||
materialized_value UInt32 MATERIALIZED 12345,
|
||||
aliased_value UInt32 ALIAS 2,
|
||||
PRIMARY KEY primary_key
|
||||
) ENGINE=MergeTree
|
||||
) ENGINE=MergeTree
|
||||
PARTITION BY partition_key
|
||||
ORDER BY (primary_key, secondary_key);
|
||||
```
|
||||
|
@ -271,8 +271,8 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
|
||||
|
||||
Этот модификатор также может быть скобинирован с модификатором [LIMIT ... WITH TIES](../../../sql-reference/statements/select/limit.md#limit-with-ties)
|
||||
|
||||
`WITH FILL` модификатор может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
|
||||
Все пропущенные значнеия для колонки `expr` будут заполненые значениями соответсвующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значенями по умолчанию.
|
||||
Модификатор `WITH FILL` может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
|
||||
Все пропущенные значения для колонки `expr` будут заполнены значениями, соответствующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значениями по умолчанию.
|
||||
|
||||
Используйте следующую конструкцию для заполнения нескольких колонок с модификатором `WITH FILL` с необязательными параметрами после каждого имени поля в секции `ORDER BY`.
|
||||
|
||||
@ -280,22 +280,22 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
|
||||
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
|
||||
```
|
||||
|
||||
`WITH FILL` может быть применене только к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами.
|
||||
`WITH FILL` может быть применен к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами. В случае применения к полям типа `String` недостающие значения заполняются пустой строкой.
|
||||
Когда не определен `FROM const_expr`, последовательность заполнения использует минимальное значение поля `expr` из `ORDER BY`.
|
||||
Когда не определен `TO const_expr`, последовательность заполнения использует максимальное значение поля `expr` из `ORDER BY`.
|
||||
Когда `STEP const_numeric_expr` определен, тогда `const_numeric_expr` интерпретируется `как есть` для числовых типов, как `дни` для типа Date и как `секунды` для типа DateTime.
|
||||
Когда `STEP const_numeric_expr` определен, `const_numeric_expr` интерпретируется "как есть" для числовых типов, как "дни" для типа `Date` и как "секунды" для типа `DateTime`.
|
||||
|
||||
Когда `STEP const_numeric_expr` не указан, тогда используется `1.0` для числовых типов, `1 день` для типа Date и `1 секунда` для типа DateTime.
|
||||
|
||||
|
||||
Для примера, следующий запрос
|
||||
Пример запроса без использования `WITH FILL`:
|
||||
```sql
|
||||
SELECT n, source FROM (
|
||||
SELECT toFloat32(number % 10) AS n, 'original' AS source
|
||||
FROM numbers(10) WHERE number % 3 = 1
|
||||
) ORDER BY n
|
||||
) ORDER BY n;
|
||||
```
|
||||
|
||||
возвращает
|
||||
Результат:
|
||||
```text
|
||||
┌─n─┬─source───┐
|
||||
│ 1 │ original │
|
||||
@ -304,7 +304,7 @@ SELECT n, source FROM (
|
||||
└───┴──────────┘
|
||||
```
|
||||
|
||||
но после применения модификатора `WITH FILL`
|
||||
Тот же запрос после применения модификатора `WITH FILL`:
|
||||
```sql
|
||||
SELECT n, source FROM (
|
||||
SELECT toFloat32(number % 10) AS n, 'original' AS source
|
||||
@ -312,7 +312,8 @@ SELECT n, source FROM (
|
||||
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
|
||||
```
|
||||
|
||||
возвращает
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌───n─┬─source───┐
|
||||
│ 0 │ │
|
||||
@ -331,7 +332,7 @@ SELECT n, source FROM (
|
||||
└─────┴──────────┘
|
||||
```
|
||||
|
||||
Для случая когда у нас есть несколько полей `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет следовать порядку полей в секции `ORDER BY`.
|
||||
Для случая с несколькими полями `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет соответствовать порядку полей в секции `ORDER BY`.
|
||||
|
||||
Пример:
|
||||
```sql
|
||||
@ -346,7 +347,7 @@ ORDER BY
|
||||
d1 WITH FILL STEP 5;
|
||||
```
|
||||
|
||||
возвращает
|
||||
Результат:
|
||||
```text
|
||||
┌───d1───────┬───d2───────┬─source───┐
|
||||
│ 1970-01-11 │ 1970-01-02 │ original │
|
||||
@ -359,9 +360,9 @@ ORDER BY
|
||||
└────────────┴────────────┴──────────┘
|
||||
```
|
||||
|
||||
Поле `d1` не заполняет и используется значение по умолчанию поскольку у нас нет повторяющихся значения для `d2` поэтому мы не можем правильно рассчитать последователность заполнения для`d1`.
|
||||
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
|
||||
|
||||
Cледующий запрос (с измененым порядком в ORDER BY)
|
||||
Cледующий запрос (с измененым порядком в ORDER BY):
|
||||
```sql
|
||||
SELECT
|
||||
toDate((number * 10) * 86400) AS d1,
|
||||
@ -374,7 +375,7 @@ ORDER BY
|
||||
d2 WITH FILL;
|
||||
```
|
||||
|
||||
возвращает
|
||||
Результат:
|
||||
```text
|
||||
┌───d1───────┬───d2───────┬─source───┐
|
||||
│ 1970-01-11 │ 1970-01-02 │ original │
|
||||
|
@ -43,7 +43,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
|
||||
- Monitoring
|
||||
- [Graphite](https://graphiteapp.org)
|
||||
- [graphouse](https://github.com/yandex/graphouse)
|
||||
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) +
|
||||
- [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse)
|
||||
- [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse)
|
||||
- [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied
|
||||
- [Grafana](https://grafana.com/)
|
||||
|
@ -6,4 +6,3 @@ toc_priority: 104
|
||||
|
||||
选择遇到的最后一个值。
|
||||
其结果和[any](../../../sql-reference/aggregate-functions/reference/any.md) 函数一样是不确定的 。
|
||||
|
@ -11,7 +11,6 @@ set (CLICKHOUSE_COPIER_LINK
|
||||
clickhouse_functions
|
||||
clickhouse_table_functions
|
||||
clickhouse_aggregate_functions
|
||||
clickhouse_dictionaries
|
||||
string_utils
|
||||
|
||||
PUBLIC
|
||||
|
@ -33,7 +33,7 @@ static std::string extractFromConfig(
|
||||
{
|
||||
DB::ConfigurationPtr bootstrap_configuration(new Poco::Util::XMLConfiguration(config_xml));
|
||||
zkutil::ZooKeeperPtr zookeeper = std::make_shared<zkutil::ZooKeeper>(
|
||||
*bootstrap_configuration, "zookeeper");
|
||||
*bootstrap_configuration, "zookeeper", nullptr);
|
||||
zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; });
|
||||
config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache);
|
||||
}
|
||||
|
@ -181,15 +181,10 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
|
||||
}
|
||||
else if (method == "loadIds")
|
||||
{
|
||||
params.read(request.getStream());
|
||||
String ids_string;
|
||||
readString(ids_string, request.getStream());
|
||||
std::vector<uint64_t> ids = parseIdsFromBinary(ids_string);
|
||||
|
||||
if (!params.has("ids"))
|
||||
{
|
||||
processError(response, "No 'ids' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<uint64_t> ids = parseIdsFromBinary(params.get("ids"));
|
||||
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
|
||||
const auto & sample_block = library_handler->getSampleBlock();
|
||||
auto input = library_handler->loadIds(ids);
|
||||
|
@ -6,6 +6,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
@ -17,7 +18,7 @@ SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dic
|
||||
if (library_handler != library_handlers.end())
|
||||
return library_handler->second;
|
||||
|
||||
return nullptr;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found dictionary with id: {}", dictionary_id);
|
||||
}
|
||||
|
||||
|
||||
|
@ -6,7 +6,6 @@ set (CLICKHOUSE_LOCAL_LINK
|
||||
clickhouse_aggregate_functions
|
||||
clickhouse_common_config
|
||||
clickhouse_common_io
|
||||
clickhouse_dictionaries
|
||||
clickhouse_functions
|
||||
clickhouse_parsers
|
||||
clickhouse_storages_system
|
||||
|
@ -322,7 +322,7 @@ struct Checker
|
||||
{
|
||||
checkRequiredInstructions();
|
||||
}
|
||||
} checker;
|
||||
} checker __attribute__((init_priority(101))); /// Run before other static initializers.
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,6 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
clickhouse_common_config
|
||||
clickhouse_common_io
|
||||
clickhouse_common_zookeeper
|
||||
clickhouse_dictionaries
|
||||
clickhouse_functions
|
||||
clickhouse_parsers
|
||||
clickhouse_storages_system
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
|
||||
@ -39,6 +40,7 @@
|
||||
#include <Common/remapExecutable.h>
|
||||
#include <Common/TLDListsHolder.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
@ -95,6 +97,9 @@
|
||||
#endif
|
||||
|
||||
#if USE_SSL
|
||||
# if USE_INTERNAL_SSL_LIBRARY
|
||||
# include <Compression/CompressionCodecEncrypted.h>
|
||||
# endif
|
||||
# include <Poco/Net/Context.h>
|
||||
# include <Poco/Net/SecureServerSocket.h>
|
||||
#endif
|
||||
@ -107,6 +112,10 @@
|
||||
# include <Server/KeeperTCPHandlerFactory.h>
|
||||
#endif
|
||||
|
||||
#if USE_BASE64
|
||||
# include <turbob64.h>
|
||||
#endif
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
@ -242,6 +251,7 @@ namespace ErrorCodes
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
extern const int SYSTEM_ERROR;
|
||||
extern const int FAILED_TO_GETPWUID;
|
||||
@ -445,6 +455,39 @@ void checkForUsersNotInMainConfig(
|
||||
}
|
||||
}
|
||||
|
||||
static void loadEncryptionKey(const std::string & key_command [[maybe_unused]], Poco::Logger * log)
|
||||
{
|
||||
#if USE_BASE64 && USE_SSL && USE_INTERNAL_SSL_LIBRARY
|
||||
|
||||
auto process = ShellCommand::execute(key_command);
|
||||
|
||||
std::string b64_key;
|
||||
readStringUntilEOF(b64_key, process->out);
|
||||
process->wait();
|
||||
|
||||
// turbob64 doesn't like whitespace characters in input. Strip
|
||||
// them before decoding.
|
||||
std::erase_if(b64_key, [](char c)
|
||||
{
|
||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
||||
});
|
||||
|
||||
std::vector<char> buf(b64_key.size());
|
||||
const size_t key_size = tb64dec(reinterpret_cast<const unsigned char *>(b64_key.data()), b64_key.size(),
|
||||
reinterpret_cast<unsigned char *>(buf.data()));
|
||||
if (!key_size)
|
||||
throw Exception("Failed to decode encryption key", ErrorCodes::INCORRECT_DATA);
|
||||
else if (key_size < 16)
|
||||
LOG_WARNING(log, "The encryption key should be at least 16 octets long.");
|
||||
|
||||
const std::string_view key = std::string_view(buf.data(), key_size);
|
||||
CompressionCodecEncrypted::setMasterKey(key);
|
||||
|
||||
#else
|
||||
LOG_WARNING(log, "Server was built without Base64 or SSL support. Encryption is disabled.");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
[[noreturn]] void forceShutdown()
|
||||
{
|
||||
@ -916,6 +959,10 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
global_context->getMergeTreeSettings().sanityCheck(settings);
|
||||
global_context->getReplicatedMergeTreeSettings().sanityCheck(settings);
|
||||
|
||||
/// Set up encryption.
|
||||
if (config().has("encryption.key_command"))
|
||||
loadEncryptionKey(config().getString("encryption.key_command"), log);
|
||||
|
||||
Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
|
||||
|
||||
Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
|
||||
@ -1047,6 +1094,7 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
loadMetadataSystem(global_context);
|
||||
/// After attaching system databases we can initialize system log.
|
||||
global_context->initializeSystemLogs();
|
||||
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
|
||||
auto & database_catalog = DatabaseCatalog::instance();
|
||||
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
|
||||
attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper);
|
||||
|
@ -1002,6 +1002,16 @@
|
||||
</compression>
|
||||
-->
|
||||
|
||||
<!-- Configuration of encryption. The server executes a command to
|
||||
obtain an encryption key at startup if such a command is
|
||||
defined, or encryption codecs will be disabled otherwise. The
|
||||
command is executed through /bin/sh and is expected to write
|
||||
a Base64-encoded key to the stdout. -->
|
||||
<encryption>
|
||||
<!-- <key_command>/usr/bin/systemd-ask-password --id="clickhouse-server" --timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command> -->
|
||||
<!-- <key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command> -->
|
||||
</encryption>
|
||||
|
||||
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
|
||||
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->
|
||||
<distributed_ddl>
|
||||
@ -1156,4 +1166,27 @@
|
||||
|
||||
<!-- Uncomment to disable ClickHouse internal DNS caching. -->
|
||||
<!-- <disable_internal_dns_cache>1</disable_internal_dns_cache> -->
|
||||
|
||||
<!-- You can also configure rocksdb like this: -->
|
||||
<!--
|
||||
<rocksdb>
|
||||
<options>
|
||||
<max_background_jobs>8</max_background_jobs>
|
||||
</options>
|
||||
<column_family_options>
|
||||
<num_levels>2</num_levels>
|
||||
</column_family_options>
|
||||
<tables>
|
||||
<table>
|
||||
<name>TABLE</name>
|
||||
<options>
|
||||
<max_background_jobs>8</max_background_jobs>
|
||||
</options>
|
||||
<column_family_options>
|
||||
<num_levels>2</num_levels>
|
||||
</column_family_options>
|
||||
</table>
|
||||
</tables>
|
||||
</rocksdb>
|
||||
-->
|
||||
</yandex>
|
||||
|
@ -11,7 +11,7 @@ bool User::equal(const IAccessEntity & other) const
|
||||
const auto & other_user = typeid_cast<const User &>(other);
|
||||
return (authentication == other_user.authentication) && (allowed_client_hosts == other_user.allowed_client_hosts)
|
||||
&& (access == other_user.access) && (granted_roles == other_user.granted_roles) && (default_roles == other_user.default_roles)
|
||||
&& (settings == other_user.settings) && (grantees == other_user.grantees);
|
||||
&& (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ struct User : public IAccessEntity
|
||||
RolesOrUsersSet default_roles = RolesOrUsersSet::AllTag{};
|
||||
SettingsProfileElements settings;
|
||||
RolesOrUsersSet grantees = RolesOrUsersSet::AllTag{};
|
||||
String default_database;
|
||||
|
||||
bool equal(const IAccessEntity & other) const override;
|
||||
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<User>(); }
|
||||
|
@ -196,6 +196,9 @@ namespace
|
||||
user->access.revokeGrantOption(AccessType::ALL);
|
||||
}
|
||||
|
||||
String default_database = config.getString(user_config + ".default_database", "");
|
||||
user->default_database = default_database;
|
||||
|
||||
return user;
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ SRCS(
|
||||
SettingsProfile.cpp
|
||||
SettingsProfileElement.cpp
|
||||
SettingsProfilesCache.cpp
|
||||
SettingsProfilesInfo.cpp
|
||||
User.cpp
|
||||
UsersConfigAccessStorage.cpp
|
||||
tests/gtest_access_rights_ops.cpp
|
||||
|
@ -43,9 +43,9 @@ public:
|
||||
const AggregateFunctionPtr & nested_function,
|
||||
const AggregateFunctionProperties &,
|
||||
const DataTypes & arguments,
|
||||
const Array &) const override
|
||||
const Array & params) const override
|
||||
{
|
||||
return std::make_shared<AggregateFunctionArray>(nested_function, arguments);
|
||||
return std::make_shared<AggregateFunctionArray>(nested_function, arguments, params);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -29,10 +29,11 @@ private:
|
||||
size_t num_arguments;
|
||||
|
||||
public:
|
||||
AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments)
|
||||
: IAggregateFunctionHelper<AggregateFunctionArray>(arguments, {})
|
||||
AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments, const Array & params_)
|
||||
: IAggregateFunctionHelper<AggregateFunctionArray>(arguments, params_)
|
||||
, nested_func(nested_), num_arguments(arguments.size())
|
||||
{
|
||||
assert(parameters == nested_func->getParameters());
|
||||
for (const auto & type : arguments)
|
||||
if (!isArray(type))
|
||||
throw Exception("All arguments for aggregate function " + getName() + " must be arrays", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
@ -34,14 +34,14 @@ public:
|
||||
const AggregateFunctionPtr & nested_function,
|
||||
const AggregateFunctionProperties &,
|
||||
const DataTypes & arguments,
|
||||
const Array &) const override
|
||||
const Array & params) const override
|
||||
{
|
||||
AggregateFunctionPtr res;
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
res.reset(createWithNumericType<
|
||||
AggregateFunctionDistinct,
|
||||
AggregateFunctionDistinctSingleNumericData>(*arguments[0], nested_function, arguments));
|
||||
AggregateFunctionDistinctSingleNumericData>(*arguments[0], nested_function, arguments, params));
|
||||
|
||||
if (res)
|
||||
return res;
|
||||
@ -49,14 +49,14 @@ public:
|
||||
if (arguments[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
return std::make_shared<
|
||||
AggregateFunctionDistinct<
|
||||
AggregateFunctionDistinctSingleGenericData<true>>>(nested_function, arguments);
|
||||
AggregateFunctionDistinctSingleGenericData<true>>>(nested_function, arguments, params);
|
||||
else
|
||||
return std::make_shared<
|
||||
AggregateFunctionDistinct<
|
||||
AggregateFunctionDistinctSingleGenericData<false>>>(nested_function, arguments);
|
||||
AggregateFunctionDistinctSingleGenericData<false>>>(nested_function, arguments, params);
|
||||
}
|
||||
|
||||
return std::make_shared<AggregateFunctionDistinct<AggregateFunctionDistinctMultipleGenericData>>(nested_function, arguments);
|
||||
return std::make_shared<AggregateFunctionDistinct<AggregateFunctionDistinctMultipleGenericData>>(nested_function, arguments, params);
|
||||
}
|
||||
};
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user