Merge remote-tracking branch 'origin/master' into parallel-replicas-not-enough-replicas

This commit is contained in:
Igor Nikonov 2023-09-18 15:29:56 +00:00
commit 047d214436
598 changed files with 12796 additions and 4543 deletions

View File

@ -79,7 +79,7 @@ IndentWidth: 4
IndentWrappedFunctionNames: false
MacroBlockBegin: ''
MacroBlockEnd: ''
NamespaceIndentation: Inner
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
@ -89,6 +89,7 @@ PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
RemoveBracesLLVM: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements

45
.github/workflows/auto_release.yml vendored Normal file
View File

@ -0,0 +1,45 @@
name: AutoRelease
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
concurrency:
group: auto-release
on: # yamllint disable-line rule:truthy
# schedule:
# - cron: '0 10-16 * * 1-5'
workflow_dispatch:
jobs:
CherryPick:
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Auto-release
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --release-after-days=3
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -47,7 +47,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
set (PARALLEL_LINK_JOBS 2)
endif()
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})

2
contrib/abseil-cpp vendored

@ -1 +1 @@
Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf
Subproject commit 5655528c41830f733160de4fb0b99073841bae9e

View File

@ -1,5 +1,5 @@
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
set(BUILD_TESTING OFF)
set(ABSL_PROPAGATE_CXX_STD ON)
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")

2
contrib/re2 vendored

@ -1 +1 @@
Subproject commit 03da4fc0857c285e3a26782f6bc8931c4c950df4
Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c

View File

@ -1,14 +1,3 @@
# Copyright 2015 The RE2 Authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# This file was edited for ClickHouse
string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space)
if(_have_space GREATER 0)
message(FATAL_ERROR "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.")
endif()
set(SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2")
set(RE2_SOURCES
@ -35,33 +24,9 @@ set(RE2_SOURCES
${SRC_DIR}/util/rune.cc
${SRC_DIR}/util/strutil.cc
)
add_library(re2 ${RE2_SOURCES})
target_include_directories(re2 PUBLIC "${SRC_DIR}")
target_link_libraries(re2 ch_contrib::abseil_str_format)
# Building re2 which is thread-safe and re2_st which is not.
# re2 changes its state during matching of regular expression, e.g. creates temporary DFA.
# It uses RWLock to process the same regular expression object from different threads.
# In order to avoid redundant locks in some cases, we use not thread-safe version of the library (re2_st).
add_library(_re2 ${RE2_SOURCES})
target_include_directories(_re2 PUBLIC "${SRC_DIR}")
target_link_libraries(_re2 ch_contrib::abseil_str_format)
add_library(re2_st ${RE2_SOURCES})
target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
target_include_directories (re2_st PRIVATE .)
target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories (re2_st SYSTEM BEFORE PUBLIC ${SRC_DIR})
target_link_libraries (re2_st ch_contrib::abseil_str_format)
file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h)
add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
COMMAND ${CMAKE_COMMAND} -DSOURCE_FILENAME="${SRC_DIR}/re2/${FILENAME}"
-DTARGET_FILENAME="${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/re2_transform.cmake"
COMMENT "Creating ${FILENAME} for re2_st library.")
add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}")
add_dependencies (re2_st transform_${FILENAME})
endforeach ()
# NOTE: you should not change name of library here, since it is used to generate required header (see above)
add_library(ch_contrib::re2 ALIAS re2)
add_library(ch_contrib::re2_st ALIAS re2_st)
add_library(ch_contrib::re2 ALIAS _re2)

View File

@ -1,10 +0,0 @@
file (READ ${SOURCE_FILENAME} CONTENT)
string (REGEX REPLACE "using re2::RE2;" "" CONTENT "${CONTENT}")
string (REGEX REPLACE "using re2::LazyRE2;" "" CONTENT "${CONTENT}")
string (REGEX REPLACE "namespace re2 {" "namespace re2_st {" CONTENT "${CONTENT}")
string (REGEX REPLACE "re2::" "re2_st::" CONTENT "${CONTENT}")
string (REGEX REPLACE "\"re2/" "\"re2_st/" CONTENT "${CONTENT}")
string (REGEX REPLACE "(.\\*?_H)" "\\1_ST" CONTENT "${CONTENT}")
string (REGEX REPLACE "#define MUTEX_IS_PTHREAD_RWLOCK" "#undef MUTEX_IS_PTHREAD_RWLOCK" CONTENT "${CONTENT}")
string (REGEX REPLACE "typedef std::mutex MutexType;" "struct MutexType { void lock() {} void unlock() {} };" CONTENT "${CONTENT}")
file (WRITE ${TARGET_FILENAME} "${CONTENT}")

2
contrib/s2geometry vendored

@ -1 +1 @@
Subproject commit 4a7ebd5da04cb6c9ea38bbf5914a9f8f3c768564
Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b

View File

@ -7,12 +7,6 @@ endif()
set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")
set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt")
message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
endif()
set(S2_SRCS
"${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
"${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
@ -58,7 +52,9 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
"${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
"${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
"${S2_SOURCE_DIR}/s2/s2error.cc"
"${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc"
"${S2_SOURCE_DIR}/s2/s2latlng.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc"
@ -93,59 +89,58 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc"
"${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc"
"${S2_SOURCE_DIR}/s2/s2shape_measures.cc"
"${S2_SOURCE_DIR}/s2/s2shape_nesting_query.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
"${S2_SOURCE_DIR}/s2/s2text_format.cc"
"${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
"${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
"${S2_SOURCE_DIR}/s2/strings/serialize.cc"
"${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
"${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
"${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
"${S2_SOURCE_DIR}/s2/util/coding/varint.cc"
"${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
"${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
"${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
)
add_library(_s2 ${S2_SRCS})
add_library(ch_contrib::s2 ALIAS _s2)
set_property(TARGET _s2 PROPERTY CXX_STANDARD 17)
if (TARGET OpenSSL::SSL)
target_link_libraries(_s2 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
endif()
# Copied from contrib/s2geometry/CMakeLists
target_link_libraries(_s2 PRIVATE
absl::base
absl::btree
absl::config
absl::core_headers
absl::dynamic_annotations
absl::endian
absl::fixed_array
absl::flat_hash_map
absl::flat_hash_set
absl::hash
absl::inlined_vector
absl::int128
absl::log_severity
absl::memory
absl::span
absl::str_format
absl::strings
absl::type_traits
absl::utility
)
absl::base
absl::btree
absl::check
absl::config
absl::core_headers
absl::dynamic_annotations
absl::endian
absl::fixed_array
absl::flags
absl::flat_hash_map
absl::flat_hash_set
absl::hash
absl::inlined_vector
absl::int128
absl::log
absl::log_severity
absl::memory
absl::span
absl::status
absl::str_format
absl::strings
absl::type_traits
absl::utility
)
target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
target_include_directories(_s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}")

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb
Subproject commit b5fcabb24d28fc33024291b2c6c1abd807c7dba8

2
contrib/usearch vendored

@ -1 +1 @@
Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba
Subproject commit 955c6f9c11adfd89c912e0d1643d160b4e9e543f

View File

@ -325,7 +325,6 @@ def parse_env_variables(
if additional_pkgs:
# NOTE: This are the env for packages/build script
result.append("MAKE_APK=true")
result.append("MAKE_RPM=true")
result.append("MAKE_TGZ=true")

View File

@ -18,6 +18,7 @@ RUN apt-get update \
python3-termcolor \
unixodbc \
pv \
jq \
zstd \
--yes --no-install-recommends

View File

@ -1,21 +1,15 @@
# docker build -t clickhouse/mysql-java-client .
# MySQL Java client docker container
FROM ubuntu:18.04
FROM openjdk:8-jdk-alpine
RUN apt-get update && \
apt-get install -y software-properties-common build-essential openjdk-8-jdk libmysql-java curl
RUN apk --no-cache add curl
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
RUN apt-get clean
ARG ver=5.1.46
RUN curl -L -o /mysql-connector-java-${ver}.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/${ver}/mysql-connector-java-${ver}.jar
ENV CLASSPATH=$CLASSPATH:/mysql-connector-java-${ver}.jar
ARG ver=8.1.0
RUN curl -L -o /mysql-connector-j-${ver}.jar https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/${ver}/mysql-connector-j-${ver}.jar
ENV CLASSPATH=$CLASSPATH:/mysql-connector-j-${ver}.jar
WORKDIR /jdbc
COPY Test.java Test.java
RUN javac Test.java
COPY PreparedStatementsTest.java PreparedStatementsTest.java
RUN javac Test.java PreparedStatementsTest.java

View File

@ -0,0 +1,193 @@
import com.mysql.cj.MysqlType;
import java.sql.*;
public class PreparedStatementsTest {
public static void main(String[] args) {
int i = 0;
String host = "127.0.0.1";
String port = "9004";
String user = "default";
String password = "";
String database = "default";
while (i < args.length) {
switch (args[i]) {
case "--host":
host = args[++i];
break;
case "--port":
port = args[++i];
break;
case "--user":
user = args[++i];
break;
case "--password":
password = args[++i];
break;
case "--database":
database = args[++i];
break;
default:
i++;
break;
}
}
// useServerPrepStmts uses COM_STMT_PREPARE and COM_STMT_EXECUTE
// instead of COM_QUERY which allows us to test the binary protocol
String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=true",
host, port, database);
try {
Class.forName("com.mysql.cj.jdbc.Driver");
Connection conn = DriverManager.getConnection(jdbcUrl, user, password);
testSimpleDataTypes(conn);
testStringTypes(conn);
testLowCardinalityAndNullableTypes(conn);
testDecimalTypes(conn);
testMiscTypes(conn);
testDateTypes(conn);
testUnusualDateTime64Scales(conn);
testDateTimeTimezones(conn);
conn.close();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
private static void testSimpleDataTypes(Connection conn) throws SQLException {
System.out.println("### testSimpleDataTypes");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_simple_data_types").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i8"), rs.getInt("i8"));
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i16"), rs.getInt("i16"));
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i32"), rs.getInt("i32"));
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i64"), rs.getLong("i64"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "i128"), rs.getString("i128"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "i256"), rs.getString("i256"));
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"));
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"));
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui64"), rs.getString("ui64"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui128"), rs.getString("ui128"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui256"), rs.getString("ui256"));
System.out.printf("%s, value: %f\n", getMysqlType(rs, "f32"), rs.getFloat("f32"));
System.out.printf("%s, value: %f\n", getMysqlType(rs, "f64"), rs.getFloat("f64"));
System.out.printf("%s, value: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"));
}
System.out.println();
}
private static void testStringTypes(Connection conn) throws SQLException {
System.out.println("### testStringTypes");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_string_types").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "s"), rs.getString("s"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "sn"), rs.getString("sn"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "lc"), rs.getString("lc"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "nlc"), rs.getString("nlc"));
}
System.out.println();
}
private static void testLowCardinalityAndNullableTypes(Connection conn) throws SQLException {
System.out.println("### testLowCardinalityAndNullableTypes");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_low_cardinality_and_nullable_types").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"));
// NULL int is represented as zero
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ni"), rs.getInt("ni"));
}
System.out.println();
}
private static void testDecimalTypes(Connection conn) throws SQLException {
System.out.println("### testDecimalTypes");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_decimal_types").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString());
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString());
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_native"),
rs.getBigDecimal("d128_native").toPlainString());
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d256"), rs.getString("d256"));
}
System.out.println();
}
private static void testDateTypes(Connection conn) throws SQLException {
System.out.println("### testDateTypes");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_date_types").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getDate("d32"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"));
}
System.out.println();
}
private static void testUnusualDateTime64Scales(Connection conn) throws SQLException {
System.out.println("### testUnusualDateTime64Scales");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_unusual_datetime64_scales").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"));
}
System.out.println();
}
private static void testDateTimeTimezones(Connection conn) throws SQLException {
System.out.println("### testDateTimeTimezones");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_datetime_timezones").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
}
System.out.println();
}
private static void testMiscTypes(Connection conn) throws SQLException {
System.out.println("### testMiscTypes");
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_misc_types").executeQuery();
int rowNum = 1;
while (rs.next()) {
System.out.printf("Row #%d\n", rowNum++);
System.out.printf("%s, value: %s\n", getMysqlType(rs, "a"), rs.getString("a"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "u"), rs.getString("u"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "t"), rs.getString("t"));
System.out.printf("%s, value: %s\n", getMysqlType(rs, "m"), rs.getString("m"));
}
System.out.println();
}
private static String getMysqlType(ResultSet rs, String columnLabel) throws SQLException {
ResultSetMetaData meta = rs.getMetaData();
return String.format("%s type is %s", columnLabel,
MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
}
}

View File

@ -46,6 +46,7 @@ class JavaConnectorTest {
Connection conn = null;
Statement stmt = null;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
conn = DriverManager.getConnection(jdbcUrl, user, password);
stmt = conn.createStatement();
stmt.executeUpdate(CREATE_TABLE_SQL);
@ -69,7 +70,7 @@ class JavaConnectorTest {
stmt.close();
conn.close();
} catch (SQLException e) {
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}

View File

@ -3,4 +3,4 @@ services:
java1:
image: clickhouse/mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest}
# to keep container running
command: sleep infinity
command: sleep 1d

View File

@ -394,7 +394,7 @@ do
done
# for each query run, prepare array of metrics from query log
clickhouse-local --query "
clickhouse-local --multiquery --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
@ -551,7 +551,7 @@ numactl --cpunodebind=all --membind=all numactl --show
# If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs.
numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --query "
clickhouse-local --multiquery --query "
-- Join the metric names back to the metric statistics we've calculated, and make
-- a denormalized table of them -- statistics for all metrics for all queries.
-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
@ -649,7 +649,7 @@ rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.ts
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
clickhouse-local --query "
clickhouse-local --multiquery --query "
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
@ -950,7 +950,7 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
for version in {right,left}
do
rm -rf data
clickhouse-local --query "
clickhouse-local --multiquery --query "
create view query_profiles as
with 0 as left, 1 as right
select * from file('analyze/query-profiles.tsv', TSV,
@ -1120,7 +1120,7 @@ function report_metrics
rm -rf metrics ||:
mkdir metrics
clickhouse-local --query "
clickhouse-local --multiquery --query "
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
;
@ -1180,7 +1180,7 @@ function upload_results
# Prepare info for the CI checks table.
rm -f ci-checks.tsv
clickhouse-local --query "
clickhouse-local --multiquery --query "
create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes);
create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')

View File

@ -40,6 +40,7 @@ RUN apt-get update -y \
cargo \
zstd \
file \
jq \
pv \
zip \
p7zip-full \
@ -87,5 +88,10 @@ RUN npm install -g azurite \
COPY run.sh /
COPY setup_minio.sh /
COPY setup_hdfs_minicluster.sh /
COPY attach_gdb.lib /
COPY utils.lib /
# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests
COPY stress_tests.lib /
CMD ["/bin/bash", "/run.sh"]

View File

@ -1,6 +1,6 @@
#!/bin/bash
source /usr/share/clickhouse-test/ci/utils.lib
source /utils.lib
function attach_gdb_to_clickhouse()
{

View File

@ -22,10 +22,10 @@ dpkg -i package_folder/clickhouse-client_*.deb
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# shellcheck disable=SC1091
source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01
source /attach_gdb.lib
# shellcheck disable=SC1091
source /usr/share/clickhouse-test/ci/utils.lib || true # FIXME: to not break old builds, clean on 2023-09-01
source /utils.lib
# install test configs
/usr/share/clickhouse-test/config/install.sh

View File

@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
source /attach_gdb.lib
source /stress_tests.lib
install_packages package_folder

View File

@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
source /attach_gdb.lib
source /stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
@ -60,6 +60,12 @@ install_packages previous_release_package_folder
# available for dump via clickhouse-local
configure
# async_replication setting doesn't exist on some older versions
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "/<async_replication>1<\/async_replication>/d" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
@ -82,6 +88,12 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
# async_replication setting doesn't exist on some older versions
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "/<async_replication>1<\/async_replication>/d" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \

View File

@ -37,6 +37,8 @@ When creating a new replica of the database, this replica creates tables by itse
[`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.
In case you need only configure a cluster without maintaining table replication, refer to [Cluster Discovery](../../operations/cluster-discovery.md) feature.
## Usage Example {#usage-example}
Creating a cluster with three hosts:

View File

@ -252,7 +252,7 @@ CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Array(Float32),
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
@ -265,7 +265,7 @@ CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Tuple(Float32[, Float32[, ...]]),
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
@ -277,5 +277,8 @@ USearch currently supports two distance functions:
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
was specified during index creation, `f16` is used as default.
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.
distance function was specified during index creation, `L2Distance` is used as default.

View File

@ -1139,6 +1139,8 @@ Optional parameters:
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
### Configuring the cache
@ -1220,7 +1222,6 @@ Configuration markup:
<account_name>account</account_name>
<account_key>pass123</account_key>
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</blob_storage_disk>
@ -1248,9 +1249,9 @@ Limit parameters (mainly for internal usage):
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).

View File

@ -58,7 +58,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334,"peak_memory_usage":"0"}
1
```
@ -288,9 +288,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
Possible header fields:
@ -439,7 +439,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -604,7 +604,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -644,7 +644,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -696,7 +696,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -715,7 +715,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -0,0 +1,171 @@
---
slug: /en/operations/cluster-discovery
sidebar_label: Cluster Discovery
---
# Cluster Discovery
## Overview
ClickHouse's Cluster Discovery feature simplifies cluster configuration by allowing nodes to automatically discover and register themselves without the need for explicit definition in the configuration files. This is especially beneficial in cases where the manual definition of each node becomes cumbersome.
:::note
Cluster Discovery is an experimental feature and can be changed or removed in future versions.
To enable it include the `allow_experimental_cluster_discovery` setting in your configuration file:
```xml
<clickhouse>
<!-- ... -->
<allow_experimental_cluster_discovery>1</allow_experimental_cluster_discovery>
<!-- ... -->
</clickhouse>
```
:::
## Remote Servers Configuration
### Traditional Manual Configuration
Traditionally, in ClickHouse, each shard and replica in the cluster needed to be manually specified in the configuration:
```xml
<remote_servers>
<cluster_name>
<shard>
<replica>
<host>node1</host>
<port>9000</port>
</replica>
<replica>
<host>node2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>node3</host>
<port>9000</port>
</replica>
<replica>
<host>node4</host>
<port>9000</port>
</replica>
</shard>
</cluster_name>
</remote_servers>
```
### Using Cluster Discovery
With Cluster Discovery, rather than defining each node explicitly, you simply specify a path in ZooKeeper. All nodes that register under this path in ZooKeeper will be automatically discovered and added to the cluster.
```xml
<remote_servers>
<cluster_name>
<discovery>
<path>/clickhouse/discovery/cluster_name</path>
</discovery>
</cluster_name>
</remote_servers>
```
If you want to specify a shard number for a particular node, you can include the `<shard>` tag within the `<discovery>` section:
for `node1` and `node2`:
```xml
<discovery>
<path>/clickhouse/discovery/cluster_name</path>
<shard>1</shard>
</discovery>
```
for `node3` and `node4`:
```xml
<discovery>
<path>/clickhouse/discovery/cluster_name</path>
<shard>2</shard>
</discovery>
```
### Observer mode
Nodes configured in observer mode will not register themselves as replicas.
They will solely observe and discover other active replicas in the cluster without actively participating.
To enable observer mode, include the `<observer/>` tag within the `<discovery>` section:
```xml
<discovery>
<path>/clickhouse/discovery/cluster_name</path>
<observer/>
</discovery>
```
## Use-Cases and Limitations
As nodes are added or removed from the specified ZooKeeper path, they are automatically discovered or removed from the cluster without the need for configuration changes or server restarts.
However, changes affect only cluster configuration, not the data or existing databases and tables.
Consider the following example with a cluster of 3 nodes:
```xml
<remote_servers>
<default>
<discovery>
<path>/clickhouse/discovery/default_cluster</path>
</discovery>
</default>
</remote_servers>
```
```
SELECT * EXCEPT (default_database, errors_count, slowdowns_count, estimated_recovery_time, database_shard_name, database_replica_name)
FROM system.clusters WHERE cluster = 'default';
┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐
│ default │ 1 │ 1 │ 1 │ 92d3c04025e8 │ 172.26.0.5 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
│ default │ 1 │ 1 │ 2 │ a6a68731c21b │ 172.26.0.4 │ 9000 │ 1 │ │ ᴺᵁᴸᴸ │
│ default │ 1 │ 1 │ 3 │ 8e62b9cb17a1 │ 172.26.0.2 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘
```
```sql
CREATE TABLE event_table ON CLUSTER default (event_time DateTime, value String)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/event_table', '{replica}')
ORDER BY event_time PARTITION BY toYYYYMM(event_time);
INSERT INTO event_table ...
```
Then, we add a new node to the cluster, starting a new node with the same entry in the `remote_servers` section in a configuration file:
```
┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐
│ default │ 1 │ 1 │ 1 │ 92d3c04025e8 │ 172.26.0.5 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
│ default │ 1 │ 1 │ 2 │ a6a68731c21b │ 172.26.0.4 │ 9000 │ 1 │ │ ᴺᵁᴸᴸ │
│ default │ 1 │ 1 │ 3 │ 8e62b9cb17a1 │ 172.26.0.2 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
│ default │ 1 │ 1 │ 4 │ b0df3669b81f │ 172.26.0.6 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘
```
The fourth node is participating in the cluster, but table `event_table` still exists only on the first three nodes:
```sql
SELECT hostname(), database, table FROM clusterAllReplicas(default, system.tables) WHERE table = 'event_table' FORMAT PrettyCompactMonoBlock
┌─hostname()───┬─database─┬─table───────┐
│ a6a68731c21b │ default │ event_table │
│ 92d3c04025e8 │ default │ event_table │
│ 8e62b9cb17a1 │ default │ event_table │
└──────────────┴──────────┴─────────────┘
```
If you need to have tables replicated on all the nodes, you may use the [Replicated](../engines/database-engines/replicated.md) database engine in alternative to cluster discovery.

View File

@ -88,7 +88,7 @@ Default: 2
## background_merges_mutations_scheduling_policy
The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`.
The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`.
## background_merges_mutations_scheduling_policy
@ -583,7 +583,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc
Type: String
Default:
Default:
## thread_pool_queue_size
@ -640,7 +640,7 @@ When `/disk1` is full, temporary data will be stored on `/disk2`.
```
Type: String
Default:
Default:
## uncompressed_cache_policy
@ -1948,7 +1948,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_thread_log>
```
@ -2236,6 +2236,8 @@ For the value of the `incl` attribute, see the section “[Configuration files](
**See Also**
- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
- [Cluster Discovery](../../operations/cluster-discovery.md)
- [Replicated database engine](../../engines/database-engines/replicated.md)
## timezone {#server_configuration_parameters-timezone}
@ -2404,7 +2406,7 @@ This section contains the following parameters:
* nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the servers hostname.
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
**Example configuration**
``` xml

View File

@ -71,7 +71,7 @@ Possible values:
- Any positive integer.
Default value: 150.
Default value: 1000.
ClickHouse artificially executes `INSERT` longer (adds sleep) so that the background merge process can merge parts faster than they are added.

View File

@ -4644,6 +4644,14 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
└─────────────────────┴──────────────────────────┘
```
## partial_result_update_duration_ms
Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET.
## max_rows_in_partial_result
Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query).
## validate_tcp_client_information {#validate-tcp-client-information}
Determines whether validation of client information enabled when query packet is received from a client using a TCP connection.
@ -4659,3 +4667,44 @@ The default value is `false`.
``` xml
<validate_tcp_client_information>true</validate_tcp_client_information>
```
## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
Allows to ignore 'permission denied' errors when using multi-directory `{}` globs for [File](../../sql-reference/table-functions/file.md#globs_in_path) and [HDFS](../../sql-reference/table-functions/hdfs.md) storages.
This setting is only applicable to multi directory `{}` globs.
Possible values: `0`, `1`.
Default value: `0`.
### Example
Having the following structure in `user_files`:
```
my_directory/
├── data1
│ ├── f1.csv
├── data2
│ ├── f2.csv
└── test_root
```
where `data1`, `data2` directories are accessible, but one has no rights to read `test_root` directories.
For a query like `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` an exception will be thrown:
`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.
It happens because a multi-directory glob requires a recursive search in _all_ available directories under `my_directory`.
If this setting is on, all inaccessible directories will be silently skipped, even if they are explicitly specified inside `{}`.
```sql
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
```
```sql
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
┌─_path───────────────────┬─_file───────┐
<full path to file><file name>
└─────────────────────────┴─────────────┘
```

View File

@ -3,12 +3,13 @@ slug: /en/operations/system-tables/information_schema
---
# INFORMATION_SCHEMA
`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables.
The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used.
`INFORMATION_SCHEMA` (or: `information_schema`) is a system database which provides a (somewhat) standardized, [DBMS-agnostic view](https://en.wikipedia.org/wiki/Information_schema) on metadata of database objects. The views in `INFORMATION_SCHEMA` are generally inferior to normal system tables but tools can use them to obtain basic information in a cross-DBMS manner. The structure and content of views in `INFORMATION_SCHEMA` is supposed to evolves in a backwards-compatible way, i.e. only new functionality is added but existing functionality is not changed or removed. In terms of internal implementation, views in `INFORMATION_SCHEMA` usually map to to normal system tables like [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md).
``` sql
SHOW TABLES FROM INFORMATION_SCHEMA;
-- or:
SHOW TABLES FROM information_schema;
```
``` text
@ -17,6 +18,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
│ SCHEMATA │
│ TABLES │
│ VIEWS │
│ columns │
│ schemata │
│ tables │
│ views │
└──────────┘
```
@ -27,6 +32,8 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
- [TABLES](#tables)
- [VIEWS](#views)
Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases.
## COLUMNS {#columns}
Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard.

View File

@ -101,7 +101,8 @@ Columns:
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously.
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.

View File

@ -0,0 +1,64 @@
---
slug: /en/operations/system-tables/scheduler
---
# scheduler
Contains information and status for [scheduling nodes](/docs/en/operations/workload-scheduling.md/#hierarchy) residing on the local server.
This table can be used for monitoring. The table contains a row for every scheduling node.
Example:
``` sql
SELECT *
FROM system.scheduler
WHERE resource = 'network_read' AND path = '/prio/fair/prod'
FORMAT Vertical
```
``` text
Row 1:
──────
resource: network_read
path: /prio/fair/prod
type: fifo
weight: 5
priority: 0
is_active: 0
active_children: 0
dequeued_requests: 67
dequeued_cost: 4692272
busy_periods: 63
vruntime: 938454.1999999989
system_vruntime: ᴺᵁᴸᴸ
queue_length: 0
queue_cost: 0
budget: -60524
is_satisfied: ᴺᵁᴸᴸ
inflight_requests: ᴺᵁᴸᴸ
inflight_cost: ᴺᵁᴸᴸ
max_requests: ᴺᵁᴸᴸ
max_cost: ᴺᵁᴸᴸ
```
Columns:
- `resource` (`String`) - Resource name
- `path` (`String`) - Path to a scheduling node within this resource scheduling hierarchy
- `type` (`String`) - Type of a scheduling node.
- `weight` (`Float64`) - Weight of a node, used by a parent node of `fair`` type.
- `priority` (`Int64`) - Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority).
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
- `active_children` (`UInt64`) - The number of children in active state.
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
- `queue_length` (`Nullable(UInt64)`) - For `fifo` nodes only. Current number of resource requests residing in the queue.
- `queue_cost` (`Nullable(UInt64)`) - For `fifo` nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue.
- `budget` (`Nullable(Int64)`) - For `fifo` nodes only. The number of available "cost units" for new resource requests. Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure)
- `is_satisfied` (`Nullable(UInt8)`) - For constraint nodes only (e.g. `inflight_limit`). Equals `1` if all the constraint of this node are satisfied.
- `inflight_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state.
- `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state.
- `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation.
- `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation.

View File

@ -16,6 +16,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
- `--log-level=LEVEL` — Set log level. Default value: `information`.
- `--no-confirmation` — If set, will not require a confirmation on several commands. Default value `false` for interactive and `true` for query
- `--help` — Shows the help message.
## Example {#clickhouse-keeper-client-example}
@ -43,12 +45,13 @@ keeper foo bar
## Commands {#clickhouse-keeper-client-commands}
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
- `cd [path]` -- Change the working path (default `.`)
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
- `cd [path]` -- Changes the working path (default `.`)
- `exists <path>` -- Returns `1` if node exists, `0` otherwise
- `set <path> <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
- `create <path> <value> [mode]` -- Creates new node with the set value
- `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
- `get <path>` -- Returns the node's value
- `remove <path>` -- Remove the node
- `rm <path> [version]` -- Removes the node only if version matches (default: -1)
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message
@ -56,3 +59,5 @@ keeper foo bar
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
- `sync <path>` -- Synchronizes node between processes and leader
- `reconfig <add|remove|set> "<arg>" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration

View File

@ -0,0 +1,153 @@
---
slug: /en/operations/workload-scheduling
sidebar_position: 69
sidebar_label: "Workload scheduling"
title: "Workload scheduling"
---
When ClickHouse execute multiple queries simultaneously, they may be using shared resources (e.g. disks). Scheduling constraints and policies can be applied to regulate how resources are utilized and shared between different workloads. For every resource a scheduling hierarchy can be configured. Hierarchy root represents a resource, while leafs are queues, holding requests that exceed resource capacity.
:::note
Currently only remote disk IO can be scheduled using described method. For CPU scheduling see settings about thread pools and [`concurrent_threads_soft_limit_num`](server-configuration-parameters/settings.md#concurrent_threads_soft_limit_num). For flexible memory limits see [Memory overcommit](settings/memory-overcommit.md)
:::
## Disk configuration {#disk-config}
To enable IO scheduling for a specific disk, you have to specify `read_resource` and/or `write_resource` in storage configuration. It says ClickHouse what resource should be used for every read and write requests with given disk. Read and write resource can refer to the same resource name, which is useful for local SSDs or HDDs. Multiple different disks also can refer to the same resource, which is useful for remote disks: if you want to be able to allow fair division of network bandwidth between e.g. "production" and "development" workloads.
Example:
```xml
<clickhouse>
<storage_configuration>
...
<disks>
<s3>
<type>s3</type>
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
<read_resource>network_read</read_resource>
<write_resource>network_write</write_resource>
</s3>
</disks>
<policies>
<s3_main>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3_main>
</policies>
</storage_configuration>
</clickhouse>
```
## Workload markup {#workload_markup}
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
Let's consider an example of a system with two different workloads: "production" and "development".
```sql
SELECT count() FROM my_table WHERE value = 42 SETTINGS workload = 'production'
SELECT count() FROM my_table WHERE value = 13 SETTINGS workload = 'development'
```
## Resource scheduling hierarchy {#hierarchy}
From the standpoint of scheduling subsystem a resource represents a hierarchy of scheduling nodes.
```mermaid
graph TD
subgraph network_read
nr_root(("/"))
-->|100 concurrent requests| nr_fair("fair")
-->|75% bandwidth| nr_prod["prod"]
nr_fair
-->|25% bandwidth| nr_dev["dev"]
end
subgraph network_write
nw_root(("/"))
-->|100 concurrent requests| nw_fair("fair")
-->|75% bandwidth| nw_prod["prod"]
nw_fair
-->|25% bandwidth| nw_dev["dev"]
end
```
**Possible node types:**
* `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child.
* `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1).
* `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
* `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
The following example shows how to define IO scheduling hierarchies shown in the picture:
```xml
<clickhouse>
<resources>
<network_read>
<node path="/">
<type>inflight_limit</type>
<max_requests>100</max_requests>
</node>
<node path="/fair">
<type>fair</type>
</node>
<node path="/fair/prod">
<type>fifo</type>
<weight>3</weight>
</node>
<node path="/fair/dev">
<type>fifo</type>
</node>
</network_read>
<network_write>
<node path="/">
<type>inflight_limit</type>
<max_requests>100</max_requests>
</node>
<node path="/fair">
<type>fair</type>
</node>
<node path="/fair/prod">
<type>fifo</type>
<weight>3</weight>
</node>
<node path="/fair/dev">
<type>fifo</type>
</node>
</network_write>
</resources>
</clickhouse>
```
## Workload classifiers {#workload_classifiers}
Workload classifiers are used to define mapping from `workload` specified by a query into leaf-queues that should be used for specific resources. At the moment, workload classification is simple: only static mapping is available.
Example:
```xml
<clickhouse>
<workload_classifiers>
<production>
<network_read>/fair/prod</network_read>
<network_write>/fair/prod</network_write>
</production>
<development>
<network_read>/fair/dev</network_read>
<network_write>/fair/dev</network_write>
</development>
<default>
<network_read>/fair/dev</network_read>
<network_write>/fair/dev</network_write>
</default>
</workload_classifiers>
</clickhouse>
```
## See also
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md)

View File

@ -12,7 +12,7 @@ Values can be added to the array in any (indeterminate) order.
The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupArray(1)(x)` is equivalent to `[any (x)]`.
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.
**Example**

View File

@ -10,7 +10,7 @@ Syntax: `groupArrayLast(max_size)(x)`
Creates an array of last argument values.
For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`.
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.
**Example**

View File

@ -725,6 +725,42 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
└────────────┴───────────┴───────────┴───────────┴───────────────┘
```
## toDaysSinceYearZero
Returns for a given date, the number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`TO_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_to-days) function.
**Syntax**
``` sql
toDaysSinceYearZero(date)
```
Aliases: `TO_DAYS`
**Arguments**
- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md) or [Date32](../../sql-reference/data-types/date32.md).
**Returned value**
The number of days passed since date 0000-01-01.
Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Example**
``` sql
SELECT toDaysSinceYearZero(toDate('2023-09-08'));
```
Result:
``` text
┌─toDaysSinceYearZero(toDate('2023-09-08')))─┐
│ 713569 │
└────────────────────────────────────────────┘
```
## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
@ -947,6 +983,8 @@ Result:
Adds the time interval or date interval to the provided date or date with time.
If the addition results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -970,13 +1008,13 @@ Aliases: `dateAdd`, `DATE_ADD`.
- `year`
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
Date or date with time obtained by adding `value`, expressed in `unit`, to `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -992,10 +1030,16 @@ Result:
└───────────────────────────────────────────────┘
```
**See Also**
- [addDate](#addDate)
## date\_sub
Subtracts the time interval or date interval from the provided date or date with time.
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -1020,13 +1064,13 @@ Aliases: `dateSub`, `DATE_SUB`.
- `year`
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -1042,10 +1086,15 @@ Result:
└────────────────────────────────────────────────┘
```
**See Also**
- [subDate](#subDate)
## timestamp\_add
Adds the specified time value with the provided date or date time value.
If the addition results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -1056,7 +1105,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
**Arguments**
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
Possible values:
@ -1074,7 +1123,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
Date or date with time with the specified `value` expressed in `unit` added to `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -1094,6 +1143,8 @@ Result:
Subtracts the time interval from the provided date or date with time.
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -1117,13 +1168,13 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
- `year`
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -1139,6 +1190,90 @@ Result:
└──────────────────────────────────────────────────────────────┘
```
## addDate
Adds the time interval or date interval to the provided date or date with time.
If the addition results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
addDate(date, interval)
```
**Arguments**
- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
**Returned value**
Date or date with time obtained by adding `interval` to `date`.
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
```sql
SELECT addDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
```
Result:
```text
┌─addDate(toDate('2018-01-01'), toIntervalYear(3))─┐
│ 2021-01-01 │
└──────────────────────────────────────────────────┘
```
Alias: `ADDDATE`
**See Also**
- [date_add](#date_add)
## subDate
Subtracts the time interval or date interval from the provided date or date with time.
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
subDate(date, interval)
```
**Arguments**
- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
**Returned value**
Date or date with time obtained by subtracting `interval` from `date`.
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
```sql
SELECT subDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
```
Result:
```text
┌─subDate(toDate('2018-01-01'), toIntervalYear(3))─┐
│ 2015-01-01 │
└──────────────────────────────────────────────────┘
```
Alias: `SUBDATE`
**See Also**
- [date_sub](#date_sub)
## now
Returns the current date and time at the moment of query analysis. The function is a constant expression.
@ -1290,6 +1425,8 @@ Rounds the time to the half hour.
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
This functions is the opposite of function `YYYYMMDDToDate()`.
**Example**
``` sql
@ -1312,8 +1449,7 @@ Converts a date or date with time to a UInt32 number containing the year and mon
**Example**
```sql
SELECT
toYYYYMMDD(now(), 'US/Eastern')
SELECT toYYYYMMDD(now(), 'US/Eastern')
```
Result:
@ -1331,8 +1467,7 @@ Converts a date or date with time to a UInt64 number containing the year and mon
**Example**
```sql
SELECT
toYYYYMMDDhhmmss(now(), 'US/Eastern')
SELECT toYYYYMMDDhhmmss(now(), 'US/Eastern')
```
Result:
@ -1343,6 +1478,93 @@ Result:
└───────────────────────────────────────┘
```
## YYYYMMDDToDate
Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md).
This functions is the opposite of function `toYYYYMMDD()`.
The output is undefined if the input does not encode a valid Date value.
**Syntax**
```sql
YYYYMMDDToDate(yyyymmdd);
```
**Arguments**
- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Returned value**
- a date created from the arguments.
Type: [Date](../../sql-reference/data-types/date.md).
**Example**
```sql
SELECT YYYYMMDDToDate(20230911);
```
Result:
```response
┌─toYYYYMMDD(20230911)─┐
│ 2023-09-11 │
└──────────────────────┘
```
## YYYYMMDDToDate32
Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md).
## YYYYMMDDhhmmssToDateTime
Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md).
The output is undefined if the input does not encode a valid DateTime value.
This functions is the opposite of function `toYYYYMMDDhhmmss()`.
**Syntax**
```sql
YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]);
```
**Arguments**
- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
**Returned value**
- a date with time created from the arguments.
Type: [DateTime](../../sql-reference/data-types/datetime.md).
**Example**
```sql
SELECT YYYYMMDDToDateTime(20230911131415);
```
Result:
```response
┌──────YYYYMMDDhhmmssToDateTime(20230911131415)─┐
│ 2023-09-11 13:14:15 │
└───────────────────────────────────────────────┘
```
## YYYYMMDDhhmmssToDateTime64
Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
Accepts an additional, optional `precision` parameter after the `timezone` parameter.
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example:
@ -1635,7 +1857,7 @@ monthName(date)
**Arguments**
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**

View File

@ -90,7 +90,7 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re
CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
```
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. The following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
```bash
/var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql

View File

@ -208,7 +208,7 @@ The optional keyword `FULL` causes the output to include the collation, comment
The statement produces a result table with the following structure:
- field - The name of the column (String)
- type - The column data type (String)
- null - If the column data type is Nullable (UInt8)
- null - `YES` if the column data type is Nullable, `NO` otherwise (String)
- key - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String)
- default - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String))
- extra - Additional information, currently unused (String)
@ -638,3 +638,16 @@ Outputs the content of the [system.table_engines](../../operations/system-tables
**See Also**
- [system.table_engines](../../operations/system-tables/table_engines.md) table
## SHOW FUNCTIONS
``` sql
SHOW FUNCTIONS [LIKE | ILIKE '<pattern>']
```
Outputs the content of the [system.functions](../../operations/system-tables/functions.md) table.
If either `LIKE` or `ILIKE` clause is specified, the query returns a list of system functions whose names match the provided `<pattern>`.
**See Also**
- [system.functions](../../operations/system-tables/functions.md) table

View File

@ -135,13 +135,13 @@ Getting data from table in table.csv, located in archive1.zip or/and archive2.zi
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
```
## Globs in Path
## Globs in Path {#globs_in_path}
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting for file & HDFS.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
- `**` - Fetches all files inside the folder recursively.
@ -210,7 +210,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
**See Also**

View File

@ -39,13 +39,13 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Globs in path {#globs_in_path}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
@ -102,6 +102,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
**See Also**

View File

@ -805,8 +805,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
<single_read_retries>4</single_read_retries>
<min_bytes_for_seek>1000</min_bytes_for_seek>
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</s3>
</disks>
@ -832,8 +830,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
- `single_read_retries` — число попыток выполнения запроса в случае возникновения ошибки в процессе чтения. Значение по умолчанию: `4`.
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.
- `metadata_path` — путь к локальному файловому хранилищу для хранения файлов с метаданными для S3. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/`.
- `cache_enabled` — признак, разрешено ли хранение кэша засечек и файлов индекса в локальной файловой системе. Значение по умолчанию: `true`.
- `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/cache/`.
- `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`.
Диск S3 может быть сконфигурирован как `main` или `cold`:

View File

@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
1
```
@ -267,9 +267,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@ -530,7 +530,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -570,7 +570,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -622,7 +622,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -641,7 +641,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -4209,3 +4209,45 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
│ 1.7091 │ 15008753 │
└─────────────────────┴──────────────────────────┘
```
## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
Позволяет игнорировать ошибку 'permission denied', возникающую при использовании шаблона `{}`, содержащего `/` внутри себя.
Работает для [File](../../sql-reference/table-functions/file.md#globs_in_path) и [HDFS](../../sql-reference/table-functions/hdfs.md).
Работает олько_ для указанных выше шаблонов `{}`.
Возможные значения: `0`, `1`.
Значение по умолчанию: `0`.
### Пример
Пусть в `user_files` имеется следующая структура:
```
my_directory/
├── data1
│ ├── f1.csv
├── data2
│ ├── f2.csv
└── test_root
```
Пусть также директории `data1`, `data2` могут быть прочитаны, но прав на чтение `test_root` нет.
На запрос `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` будет выброшено исключение:
`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.
Это происходит, так как для обработки такого шаблона необходимо выполнить рекурсивный поиск по сем_ директориям, находящимся внутри `my_directory`.
Если данная настройка имеет значение 1, то недоступные директории будут тихо пропущены, даже если они явно указаны внутри `{}`.
```sql
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
```
```sql
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
┌─_path───────────────────┬─_file───────┐
<full path to file><file name>
└─────────────────────────┴─────────────┘
```

View File

@ -99,7 +99,8 @@ ClickHouse не удаляет данные из таблица автомати
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов, эти потоки не обязательно выполняются одновременно.
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — максимальное количество одновременно работавших потоков, участвоваших в обработке запроса.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.

View File

@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
@ -124,6 +124,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
- `_path` — путь к файлу.
- `_file` — имя файла.
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)

View File

@ -39,11 +39,11 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Шаблоны в пути**
## Шаблоны поиска в компонентах пути {#globs-in-path}
- `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — Заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
- `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
@ -62,3 +62,5 @@ LIMIT 2
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
- Параметр [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs)

View File

@ -745,8 +745,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
<single_read_retries>4</single_read_retries>
<min_bytes_for_seek>1000</min_bytes_for_seek>
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</s3>
</disks>
@ -772,8 +770,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
- `single_read_retries` - 读过程中连接丢失后重试次数默认值为4。
- `min_bytes_for_seek` - 使用查找操作而不是顺序读操作的最小字节数默认值为1000。
- `metadata_path` - 本地存放S3元数据文件的路径默认值为`/var/lib/clickhouse/disks/<disk_name>/`
- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。
- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks/<disk_name>/cache/`。
- `skip_access_check` - 如果为`true`Clickhouse启动时不检查磁盘是否可用。默认为`false`。
- `server_side_encryption_customer_key_base64` - 如果指定该项的值请求时会加上为了访问SSE-C加密数据而必须的头信息。
@ -823,4 +819,3 @@ S3磁盘也可以设置冷热存储
- `_part_uuid` - 唯一部分标识符(如果 MergeTree 设置`assign_part_uuids` 已启用)。
- `_partition_value``partition by` 表达式的值(元组)。
- `_sample_factor` - 采样因子(来自请求)。

View File

@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
1
```
@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
显示字段信息:
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -5,8 +5,8 @@ sidebar_position: 31
# stddevSamp {#stddevsamp}
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
结果等于 [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) 的平方根。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::
:::

View File

@ -32,10 +32,10 @@ contents:
dst: /usr/bin/clickhouse-keeper
- src: clickhouse-keeper.service
dst: /lib/systemd/system/clickhouse-keeper.service
- src: clickhouse
- src: clickhouse-keeper
dst: /usr/bin/clickhouse-keeper-client
type: symlink
- src: clickhouse
- src: clickhouse-keeper
dst: /usr/bin/clickhouse-keeper-converter
type: symlink
# docs

View File

@ -1209,8 +1209,6 @@
<single_read_retries>4</single_read_retries>
<min_bytes_for_seek>1000</min_bytes_for_seek>
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</s3>
</disks>

View File

@ -18,7 +18,14 @@
#include <Common/Exception.h>
#include <Common/parseGlobs.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
static void setupLogging(const std::string & log_level)
{

View File

@ -9,8 +9,6 @@
#include <thread>
#include <filesystem>
#include <re2/re2.h>
#include <boost/program_options.hpp>
#include <Common/TerminalSize.h>
@ -26,6 +24,14 @@
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
static constexpr auto documentation = R"(
A tool to extract information from Git repository for analytics.

View File

@ -9,11 +9,11 @@ namespace DB
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -42,11 +42,11 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -64,11 +64,12 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
@ -93,11 +94,12 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
@ -143,10 +145,10 @@ void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client)
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -156,13 +158,28 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool ExistsCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(path));
return true;
}
void ExistsCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
std::cout << client->zookeeper->exists(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -325,25 +342,33 @@ void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client)
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
ASTPtr version;
if (ParserNumber{}.parse(pos, version, expected))
node->args.push_back(version->as<ASTLiteral &>().value);
return true;
}
void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
Int32 version{-1};
if (query->args.size() == 2)
version = static_cast<Int32>(query->args[1].get<Int32>());
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
}
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -351,8 +376,72 @@ bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & nod
void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
String path = client->getAbsolutePath(query->args[0].safeGet<String>());
client->askConfirmation("You are going to recursively delete path " + path,
[client, path]{ client->zookeeper->removeRecursive(path); });
client->askConfirmation(
"You are going to recursively delete path " + path, [client, path] { client->zookeeper->removeRecursive(path); });
}
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
ReconfigCommand::Operation operation;
if (ParserKeyword{"ADD"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::ADD;
else if (ParserKeyword{"REMOVE"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::REMOVE;
else if (ParserKeyword{"SET"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::SET;
else
return false;
node->args.push_back(operation);
ParserToken{TokenType::Whitespace}.ignore(pos);
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
String joining;
String leaving;
String new_members;
auto operation = query->args[0].get<ReconfigCommand::Operation>();
switch (operation)
{
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
joining = query->args[1].safeGet<DB::String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
leaving = query->args[1].safeGet<DB::String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::SET):
new_members = query->args[1].safeGet<DB::String>();
break;
default:
UNREACHABLE();
}
auto response = client->zookeeper->reconfig(joining, leaving, new_members);
std::cout << response.value << '\n';
}
bool SyncCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(path));
return true;
}
void SyncCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
std::cout << client->zookeeper->sync(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const

View File

@ -51,7 +51,7 @@ class CDCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
String getHelpMessage() const override { return "{} [path] -- Changes the working path (default `.`)"; }
};
class SetCommand : public IKeeperClientCommand
@ -64,7 +64,7 @@ class SetCommand : public IKeeperClientCommand
String getHelpMessage() const override
{
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
return "{} <path> <value> [version] -- Updates the node's value. Only updates if version matches (default: -1)";
}
};
@ -101,6 +101,17 @@ class GetCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
};
class ExistsCommand : public IKeeperClientCommand
{
String getName() const override { return "exists"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Returns `1` if node exists, `0` otherwise"; }
};
class GetStatCommand : public IKeeperClientCommand
{
String getName() const override { return "get_stat"; }
@ -154,7 +165,6 @@ class FindBigFamily : public IKeeperClientCommand
}
};
class RMCommand : public IKeeperClientCommand
{
String getName() const override { return "rm"; }
@ -163,7 +173,7 @@ class RMCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
String getHelpMessage() const override { return "{} <path> [version] -- Removes the node only if version matches (default: -1)"; }
};
class RMRCommand : public IKeeperClientCommand
@ -177,6 +187,35 @@ class RMRCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
};
class ReconfigCommand : public IKeeperClientCommand
{
enum class Operation : UInt8
{
ADD = 0,
REMOVE = 1,
SET = 2,
};
String getName() const override { return "reconfig"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <add|remove|set> \"<arg>\" [version] -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration"; }
};
class SyncCommand: public IKeeperClientCommand
{
String getName() const override { return "sync"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Synchronizes node between processes and leader"; }
};
class HelpCommand : public IKeeperClientCommand
{
String getName() const override { return "help"; }

View File

@ -84,8 +84,11 @@ std::vector<String> KeeperClient::getCompletions(const String & prefix) const
void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
{
if (!ask_confirmation)
return callback();
std::cout << prompt << " Continue?\n";
need_confirmation = true;
waiting_confirmation = true;
confirmation_callback = callback;
}
@ -170,6 +173,14 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
Poco::Util::Option("log-level", "", "set log level")
.argument("<level>")
.binding("log-level"));
options.addOption(
Poco::Util::Option("no-confirmation", "", "if set, will not require a confirmation on several commands. default false for interactive and true for query")
.binding("no-confirmation"));
options.addOption(
Poco::Util::Option("tests-mode", "", "run keeper-client in a special mode for tests. all commands output are separated by special symbols. default false")
.binding("tests-mode"));
}
void KeeperClient::initialize(Poco::Util::Application & /* self */)
@ -184,12 +195,15 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<CreateCommand>(),
std::make_shared<TouchCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<ExistsCommand>(),
std::make_shared<GetStatCommand>(),
std::make_shared<FindSuperNodes>(),
std::make_shared<DeleteStaleBackups>(),
std::make_shared<FindBigFamily>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<ReconfigCommand>(),
std::make_shared<SyncCommand>(),
std::make_shared<HelpCommand>(),
std::make_shared<FourLetterWordCommand>(),
});
@ -229,18 +243,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
EventNotifier::init();
}
void KeeperClient::executeQuery(const String & query)
{
std::vector<String> queries;
boost::algorithm::split(queries, query, boost::is_any_of(";"));
for (const auto & query_text : queries)
{
if (!query_text.empty())
processQueryText(query_text);
}
}
bool KeeperClient::processQueryText(const String & text)
{
if (exit_strings.find(text) != exit_strings.end())
@ -248,29 +250,44 @@ bool KeeperClient::processQueryText(const String & text)
try
{
if (need_confirmation)
if (waiting_confirmation)
{
need_confirmation = false;
waiting_confirmation = false;
if (text.size() == 1 && (text == "y" || text == "Y"))
confirmation_callback();
return true;
}
KeeperParser parser;
String message;
const char * begin = text.data();
ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
const char * end = begin + text.size();
if (!res)
while (begin < end)
{
std::cerr << message << "\n";
return true;
String message;
ASTPtr res = tryParseQuery(
parser,
begin,
end,
/* out_error_message = */ message,
/* hilite = */ true,
/* description = */ "",
/* allow_multi_statements = */ true,
/* max_query_size = */ 0,
/* max_parser_depth = */ 0,
/* skip_insignificant = */ false);
if (!res)
{
std::cerr << message << "\n";
return true;
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
catch (Coordination::Exception & err)
{
@ -279,7 +296,7 @@ bool KeeperClient::processQueryText(const String & text)
return true;
}
void KeeperClient::runInteractive()
void KeeperClient::runInteractiveReplxx()
{
LineReader::Patterns query_extenders = {"\\"};
@ -299,7 +316,7 @@ void KeeperClient::runInteractive()
while (true)
{
String prompt;
if (need_confirmation)
if (waiting_confirmation)
prompt = "[y/n] ";
else
prompt = cwd.string() + " :) ";
@ -313,6 +330,26 @@ void KeeperClient::runInteractive()
}
}
void KeeperClient::runInteractiveInputStream()
{
for (String input; std::getline(std::cin, input);)
{
if (!processQueryText(input))
break;
std::cout << "\a\a\a\a" << std::endl;
std::cerr << std::flush;
}
}
void KeeperClient::runInteractive()
{
if (config().hasOption("tests-mode"))
runInteractiveInputStream();
else
runInteractiveReplxx();
}
int KeeperClient::main(const std::vector<String> & /* args */)
{
if (config().hasOption("help"))
@ -362,8 +399,13 @@ int KeeperClient::main(const std::vector<String> & /* args */)
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
if (config().has("no-confirmation") || config().has("query"))
ask_confirmation = false;
if (config().has("query"))
executeQuery(config().getString("query"));
{
processQueryText(config().getString("query"));
}
else
runInteractive();

View File

@ -49,8 +49,10 @@ public:
protected:
void runInteractive();
void runInteractiveReplxx();
void runInteractiveInputStream();
bool processQueryText(const String & text);
void executeQuery(const String & query);
void loadCommands(std::vector<Command> && new_commands);
@ -61,7 +63,8 @@ protected:
zkutil::ZooKeeperArgs zk_args;
bool need_confirmation = false;
bool ask_confirmation = true;
bool waiting_confirmation = false;
std::vector<String> registered_commands_and_four_letter_words;
};

View File

@ -7,43 +7,32 @@ namespace DB
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
{
expected.add(pos, getTokenName(TokenType::BareWord));
if (pos->type == TokenType::BareWord)
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
{
result = String(pos->begin, pos->end);
++pos;
ParserToken{TokenType::Whitespace}.ignore(pos);
return true;
if (!parseIdentifierOrStringLiteral(pos, expected, result))
return false;
}
bool status = parseIdentifierOrStringLiteral(pos, expected, result);
ParserToken{TokenType::Whitespace}.ignore(pos);
return status;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
return parseIdentifierOrStringLiteral(pos, expected, path);
String result;
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream)
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon)
{
result.append(pos->begin, pos->end);
++pos;
}
ParserToken{TokenType::Whitespace}.ignore(pos);
if (result.empty())
return false;
path = result;
return true;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
return parseKeeperArg(pos, expected, path);
}
bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto query = std::make_shared<ASTKeeperQuery>();

View File

@ -572,17 +572,14 @@ void LocalServer::processConfig()
if (!queries.empty() && config().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
if (config().has("multiquery"))
is_multiquery = true;
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
if (is_interactive && !delayed_interactive)
{
if (config().has("multiquery"))
is_multiquery = true;
}
else
if (!is_interactive || delayed_interactive)
{
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
ignore_error = config().getBool("ignore-error", false);
is_multiquery = true;
}
print_stack_trace = config().getBool("stacktrace", false);
@ -881,6 +878,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
if (options.count("database"))
config().setString("default_database", options["database"].as<std::string>());
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());

View File

@ -1393,7 +1393,7 @@ try
const auto interserver_listen_hosts = getInterserverListenHosts(config());
const auto listen_try = getListenTry(config());
if (config().has("keeper_server"))
if (config().has("keeper_server.server_id"))
{
#if USE_NURAFT
//// If we don't have configured connection probably someone trying to use clickhouse-server instead

View File

@ -448,8 +448,6 @@
<account_name>account</account_name>
<account_key>pass123</account_key>
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</blob_storage_disk>
</disks>

View File

@ -10,9 +10,17 @@
#include <Disks/IO/createReadBufferFromFileBase.h>
#include <boost/program_options.hpp>
#include <re2/re2.h>
#include <filesystem>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace fs = std::filesystem;
#define EXTRACT_PATH_PATTERN ".*\\/store/(.*)"

View File

@ -26,10 +26,17 @@
#include <IO/Operators.h>
#include <Poco/AccessExpireCache.h>
#include <boost/algorithm/string/join.hpp>
#include <re2/re2.h>
#include <filesystem>
#include <mutex>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{

View File

@ -1,11 +1,18 @@
#pragma once
#include <re2/re2.h>
#include <Analyzer/Identifier.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{

View File

@ -1,12 +1,18 @@
#pragma once
#include <re2/re2.h>
#include <Analyzer/Identifier.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ColumnTransformers.h>
#include <Parsers/ASTAsterisk.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{

View File

@ -6341,9 +6341,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat(), scope.context);
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(scope.context);
}
if (use_columns_from_insert_query)

View File

@ -1,195 +0,0 @@
#include "UniqToCountPass.h"
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
namespace DB
{
namespace
{
bool matchFnUniq(String func_name)
{
auto name = Poco::toLower(func_name);
return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined"
|| name == "uniqCombined64";
}
/// Extract the corresponding projection columns for group by node list.
/// For example:
/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode)
NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node)
{
if (!query_node->hasGroupBy())
return {};
NamesAndTypes result;
for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren())
{
const auto & projection_columns = query_node->getProjectionColumns();
const auto & projection_nodes = query_node->getProjection().getNodes();
assert(projection_columns.size() == projection_nodes.size());
for (size_t i = 0; i < projection_columns.size(); i++)
{
if (projection_nodes[i]->isEqual(*group_by_ele))
result.push_back(projection_columns[i]);
}
}
return result;
}
/// Whether query_columns equals subquery_columns.
/// query_columns: query columns from query
/// subquery_columns: projection columns from subquery
bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns)
{
if (query_columns.size() != subquery_columns.size())
return false;
for (const auto & query_column : query_columns)
{
auto find = std::find_if(
subquery_columns.begin(),
subquery_columns.end(),
[&](const auto & subquery_column) -> bool
{
if (auto * column_node = query_column->as<ColumnNode>())
{
return subquery_column == column_node->getColumn();
}
return false;
});
if (find == subquery_columns.end())
return false;
}
return true;
}
/// Whether subquery_columns contains all columns in subquery_columns.
/// query_columns: query columns from query
/// subquery_columns: projection columns from subquery
bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns)
{
if (query_columns.size() > subquery_columns.size())
return false;
for (const auto & query_column : query_columns)
{
auto find = std::find_if(
subquery_columns.begin(),
subquery_columns.end(),
[&](const auto & subquery_column) -> bool
{
if (auto * column_node = query_column->as<ColumnNode>())
{
return subquery_column == column_node->getColumn();
}
return false;
});
if (find == subquery_columns.end())
return false;
}
return true;
}
}
class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext<UniqToCountVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<UniqToCountVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_uniq_to_count)
return;
auto * query_node = node->as<QueryNode>();
if (!query_node)
return;
/// Check that query has only single table expression which is subquery
auto * subquery_node = query_node->getJoinTree()->as<QueryNode>();
if (!subquery_node)
return;
/// Check that query has only single node in projection
auto & projection_nodes = query_node->getProjection().getNodes();
if (projection_nodes.size() != 1)
return;
/// Check that projection_node is a function
auto & projection_node = projection_nodes[0];
auto * function_node = projection_node->as<FunctionNode>();
if (!function_node)
return;
/// Check that query single projection node is `uniq` or its variants
if (!matchFnUniq(function_node->getFunctionName()))
return;
auto & uniq_arguments_nodes = function_node->getArguments().getNodes();
/// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)'
auto match_subquery_with_distinct = [&]() -> bool
{
if (!subquery_node->isDistinct())
return false;
/// uniq expression list == subquery projection columns
if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns()))
return false;
return true;
};
/// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)'
auto match_subquery_with_group_by = [&]() -> bool
{
if (!subquery_node->hasGroupBy())
return false;
/// uniq argument node list == subquery group by node list
auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node);
if (!nodeListEquals(uniq_arguments_nodes, group_by_columns))
return false;
/// subquery projection columns must contain all columns in uniq argument node list
if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns()))
return false;
return true;
};
/// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
}
}
};
void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
UniqToCountVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -1,30 +0,0 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Optimize `uniq` and its variants(except uniqUpTo) into `count` over subquery.
* Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to
* Result: 'SELECT count() FROM (SELECT DISTINCT x ...)'
*
* Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to
* Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)'
*
* Note that we can rewrite all uniq variants except uniqUpTo.
*/
class UniqToCountPass final : public IQueryTreePass
{
public:
String getName() override { return "UniqToCount"; }
String getDescription() override
{
return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -18,7 +18,6 @@
#include <Analyzer/Utils.h>
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Passes/CountDistinctPass.h>
#include <Analyzer/Passes/UniqToCountPass.h>
#include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
#include <Analyzer/Passes/SumIfToCountIfPass.h>
@ -248,7 +247,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
manager.addPass(std::make_unique<CountDistinctPass>());
manager.addPass(std::make_unique<UniqToCountPass>());
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
manager.addPass(std::make_unique<SumIfToCountIfPass>());
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());

View File

@ -316,7 +316,6 @@ target_link_libraries(clickhouse_common_io
boost::context
ch_contrib::cityhash
ch_contrib::re2
ch_contrib::re2_st
ch_contrib::zlib
pcg_random
Poco::Foundation

View File

@ -441,7 +441,20 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (!block)
return;
processed_rows += block.rows();
if (block.rows() == 0 && partial_result_mode == PartialResultMode::Active)
{
partial_result_mode = PartialResultMode::Inactive;
if (is_interactive)
{
progress_indication.clearProgressOutput(*tty_buf);
std::cout << "Full result:" << std::endl;
progress_indication.writeProgress(*tty_buf);
}
}
if (partial_result_mode == PartialResultMode::Inactive)
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initOutputFormat(block, parsed_query);
@ -451,13 +464,20 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
if (!is_interactive && partial_result_mode == PartialResultMode::Active)
return;
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout))
progress_indication.clearProgressOutput(*tty_buf);
try
{
output_format->write(materializeBlock(block));
if (partial_result_mode == PartialResultMode::Active)
output_format->writePartialResult(materializeBlock(block));
else
output_format->write(materializeBlock(block));
written_first_block = true;
}
catch (const Exception &)
@ -521,6 +541,9 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
try
{
if (partial_result_mode == PartialResultMode::NotInit)
partial_result_mode = PartialResultMode::Active;
if (!output_format)
{
/// Ignore all results when fuzzing as they can be huge.
@ -931,6 +954,14 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
const auto & settings = global_context->getSettingsRef();
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
bool has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0;
if (has_partial_result_setting)
{
partial_result_mode = PartialResultMode::NotInit;
if (is_interactive)
std::cout << "Partial result:" << std::endl;
}
int retries_left = 10;
while (retries_left)
@ -1040,7 +1071,9 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
}
catch (const LocalFormatError &)
{
local_format_error = std::current_exception();
/// Remember the first exception.
if (!local_format_error)
local_format_error = std::current_exception();
connection->sendCancel();
}
}
@ -1736,6 +1769,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
}
processed_rows = 0;
partial_result_mode = PartialResultMode::Inactive;
written_first_block = false;
progress_indication.resetProgress();
profile_events.watch.restart();

View File

@ -272,6 +272,21 @@ protected:
size_t processed_rows = 0; /// How many rows have been read or written.
bool print_num_processed_rows = false; /// Whether to print the number of processed rows at
enum class PartialResultMode: UInt8
{
/// Query doesn't show partial result before the first block with 0 rows.
/// The first block with 0 rows initializes the output table format using its header.
NotInit,
/// Query shows partial result after the first and before the second block with 0 rows.
/// The second block with 0 rows indicates that that receiving blocks with partial result has been completed and next blocks will be with the full result.
Active,
/// Query doesn't show partial result at all.
Inactive,
};
PartialResultMode partial_result_mode = PartialResultMode::Inactive;
bool print_stack_trace = false;
/// The last exception that was received from the server. Is used for the
/// return code in batch mode.

View File

@ -73,7 +73,7 @@ ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & fu
}
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, size_t version_)
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, std::optional<size_t> version_)
{
func = func_;
version = version_;

View File

@ -103,7 +103,7 @@ private:
public:
~ColumnAggregateFunction() override;
void set(const AggregateFunctionPtr & func_, size_t version_);
void set(const AggregateFunctionPtr & func_, std::optional<size_t> version_ = std::nullopt);
AggregateFunctionPtr getAggregateFunction() { return func; }
AggregateFunctionPtr getAggregateFunction() const { return func; }

View File

@ -80,7 +80,7 @@ StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, cha
res.data = pos;
}
memcpy(pos, &data[n], sizeof(T));
return StringRef(pos, sizeof(T));
return res;
}
template <is_decimal T>

View File

@ -670,9 +670,8 @@ UInt128 ColumnUnique<ColumnType>::IncrementalHash::getHash(const ColumnType & co
for (size_t i = 0; i < column_size; ++i)
column.updateHashWithValue(i, sip_hash);
hash = sip_hash.get128();
std::lock_guard lock(mutex);
hash = sip_hash.get128();
cur_hash = hash;
num_added_rows.store(column_size);
}

View File

@ -433,7 +433,7 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
template <typename T>
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
{
auto res = this->create();
auto res = this->create(size);
if (size > 0)
{

View File

@ -397,6 +397,13 @@ public:
/// It affects performance only (not correctness).
virtual void reserve(size_t /*n*/) {}
/// Requests the removal of unused capacity.
/// It is a non-binding request to reduce the capacity of the underlying container to its size.
virtual MutablePtr shrinkToFit() const
{
return cloneResized(size());
}
/// If we have another column as a source (owner of data), copy all data to ourself and reset source.
virtual void ensureOwnership() {}

View File

@ -1,24 +1,8 @@
#include "Allocator.h"
/** Keep definition of this constant in cpp file; otherwise its value
* is inlined into allocator code making it impossible to override it
* in third-party code.
*
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
*/
#ifdef NDEBUG
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*
* Not too small to avoid too quick exhaust of memory mappings.
*/
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
#endif
/// Constant is chosen almost arbitrarily, what I observed is 128KB is too small, 1MB is almost indistinguishable from 64MB and 1GB is too large.
extern const size_t POPULATE_THRESHOLD = 16 * 1024 * 1024;
template class Allocator<false, false>;
template class Allocator<true, false>;

View File

@ -20,12 +20,6 @@
#include <sys/mman.h>
#include <Core/Defines.h>
#if defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER)
/// Thread and memory sanitizers do not intercept mremap. The usage of
/// mremap will lead to false positives.
#define DISABLE_MREMAP 1
#endif
#include <base/mremap.h>
#include <base/getPageSize.h>
#include <Common/CurrentMemoryTracker.h>
@ -35,52 +29,33 @@
#include <Common/Allocator_fwd.h>
#include <base/errnoToString.h>
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/**
* Many modern allocators (for example, tcmalloc) do not do a mremap for
* realloc, even in case of large enough chunks of memory. Although this allows
* you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address
* space is very slow, especially in the case of a large number of threads. We
* expect that the set of operations mmap/something to do/mremap can only be
* performed about 1000 times per second.
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
* to override it during linkage when using ClickHouse as a library in
* third-party applications which may already use own allocator doing mmaps
* in the implementation of alloc/realloc.
*/
extern const size_t MMAP_THRESHOLD;
extern const size_t POPULATE_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int LOGICAL_ERROR;
}
}
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
* That is why we don't do manual mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
*/
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables.
* The interface is different from std::allocator
@ -88,10 +63,8 @@ namespace ErrorCodes
* - passing the size into the `free` method;
* - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables);
* - random hint address for mmap
* - mmap_threshold for using mmap less or more
*/
template <bool clear_memory_, bool mmap_populate>
template <bool clear_memory_, bool populate>
class Allocator
{
public:
@ -111,7 +84,7 @@ public:
try
{
checkSize(size);
freeNoTrack(buf, size);
freeNoTrack(buf);
auto trace = CurrentMemoryTracker::free(size);
trace.onFree(buf, size);
}
@ -135,8 +108,7 @@ public:
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
&& alignment <= MALLOC_MIN_ALIGNMENT)
else if (alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
auto trace_free = CurrentMemoryTracker::free(old_size);
@ -145,7 +117,10 @@ public:
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
{
DB::throwFromErrno(
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
buf = new_buf;
trace_alloc.onAlloc(buf, new_size);
@ -154,46 +129,18 @@ public:
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
auto trace_free = CurrentMemoryTracker::free(old_size);
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
trace_free.onFree(buf, old_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
trace_alloc.onAlloc(buf, new_size);
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
auto trace_free = CurrentMemoryTracker::free(old_size);
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
trace_free.onFree(buf, old_size);
void * new_buf = allocNoTrack(new_size, alignment);
trace_alloc.onAlloc(buf, new_size);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
if constexpr (populate)
prefaultPages(buf, new_size);
return buf;
}
@ -205,83 +152,42 @@ protected:
static constexpr bool clear_memory = clear_memory_;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
size_t mmap_min_alignment = ::getPageSize();
if (size >= MMAP_THRESHOLD)
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if (alignment > mmap_min_alignment)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Too large alignment {}: more than page size when allocating {}.",
ReadableSize(alignment), ReadableSize(size));
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if constexpr (clear_memory)
memset(buf, 0, size);
}
if constexpr (clear_memory)
memset(buf, 0, size);
}
if constexpr (populate)
prefaultPages(buf, size);
return buf;
}
void freeNoTrack(void * buf, size_t size)
void freeNoTrack(void * buf)
{
if (size >= MMAP_THRESHOLD)
{
if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
::free(buf);
}
::free(buf);
}
void checkSize(size_t size)
@ -291,20 +197,32 @@ private:
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void * getMmapHint()
/// Address passed to madvise is required to be aligned to the page boundary.
auto adjustToPageSize(void * buf, size_t len, size_t page_size)
{
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
const uintptr_t address_numeric = reinterpret_cast<uintptr_t>(buf);
const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size;
return std::make_pair(reinterpret_cast<void *>(next_page_start), len - (next_page_start - address_numeric));
}
#else
void * getMmapHint()
void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
{
return nullptr;
}
#if defined(MADV_POPULATE_WRITE)
if (len_ < POPULATE_THRESHOLD)
return;
static const size_t page_size = ::getPageSize();
if (len_ < page_size) /// Rounded address should be still within [buf, buf + len).
return;
auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
LOG_TRACE(
LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
"Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
errnoToString(res));
#endif
}
};

View File

@ -3,7 +3,7 @@
* This file provides forward declarations for Allocator.
*/
template <bool clear_memory_, bool mmap_populate = false>
template <bool clear_memory_, bool populate = false>
class Allocator;
template <typename Base, size_t N = 64, size_t Alignment = 1>

View File

@ -57,7 +57,7 @@ ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCou
*waiter = this;
}
// Grant single slot to allocation, returns true iff more slot(s) are required
// Grant single slot to allocation returns true iff more slot(s) are required
bool ConcurrencyControl::Allocation::grant()
{
std::unique_lock lock{mutex};

View File

@ -175,8 +175,6 @@
M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
M(KafkaConsumers, "Number of active Kafka consumers") \

View File

@ -97,6 +97,14 @@ ThreadGroupPtr CurrentThread::getGroup()
return current_thread->getThreadGroup();
}
ContextPtr CurrentThread::getQueryContext()
{
if (unlikely(!current_thread))
return {};
return current_thread->getQueryContext();
}
std::string_view CurrentThread::getQueryId()
{
if (unlikely(!current_thread))

View File

@ -86,6 +86,10 @@ public:
static void finalizePerformanceCounters();
/// Returns a non-empty string if the thread is attached to a query
/// Returns attached query context
static ContextPtr getQueryContext();
static std::string_view getQueryId();
/// Initializes query with current thread as master thread in constructor, and detaches it in destructor

View File

@ -71,14 +71,14 @@ private:
// Same as above but select different function overloads for zero saturation.
STRONG_TYPEDEF(UInt32, LUTIndexWithSaturation)
static inline LUTIndex normalizeLUTIndex(UInt32 index)
static LUTIndex normalizeLUTIndex(UInt32 index)
{
if (index >= DATE_LUT_SIZE)
return LUTIndex(DATE_LUT_SIZE - 1);
return LUTIndex{index};
}
static inline LUTIndex normalizeLUTIndex(Int64 index)
static LUTIndex normalizeLUTIndex(Int64 index)
{
if (unlikely(index < 0))
return LUTIndex(0);
@ -88,59 +88,59 @@ private:
}
template <typename T>
friend inline LUTIndex operator+(const LUTIndex & index, const T v)
friend LUTIndex operator+(const LUTIndex & index, const T v)
{
return normalizeLUTIndex(index.toUnderType() + UInt32(v));
}
template <typename T>
friend inline LUTIndex operator+(const T v, const LUTIndex & index)
friend LUTIndex operator+(const T v, const LUTIndex & index)
{
return normalizeLUTIndex(static_cast<Int64>(v + index.toUnderType()));
}
friend inline LUTIndex operator+(const LUTIndex & index, const LUTIndex & v)
friend LUTIndex operator+(const LUTIndex & index, const LUTIndex & v)
{
return normalizeLUTIndex(static_cast<UInt32>(index.toUnderType() + v.toUnderType()));
}
template <typename T>
friend inline LUTIndex operator-(const LUTIndex & index, const T v)
friend LUTIndex operator-(const LUTIndex & index, const T v)
{
return normalizeLUTIndex(static_cast<Int64>(index.toUnderType() - UInt32(v)));
}
template <typename T>
friend inline LUTIndex operator-(const T v, const LUTIndex & index)
friend LUTIndex operator-(const T v, const LUTIndex & index)
{
return normalizeLUTIndex(static_cast<Int64>(v - index.toUnderType()));
}
friend inline LUTIndex operator-(const LUTIndex & index, const LUTIndex & v)
friend LUTIndex operator-(const LUTIndex & index, const LUTIndex & v)
{
return normalizeLUTIndex(static_cast<Int64>(index.toUnderType() - v.toUnderType()));
}
template <typename T>
friend inline LUTIndex operator*(const LUTIndex & index, const T v)
friend LUTIndex operator*(const LUTIndex & index, const T v)
{
return normalizeLUTIndex(index.toUnderType() * UInt32(v));
}
template <typename T>
friend inline LUTIndex operator*(const T v, const LUTIndex & index)
friend LUTIndex operator*(const T v, const LUTIndex & index)
{
return normalizeLUTIndex(v * index.toUnderType());
}
template <typename T>
friend inline LUTIndex operator/(const LUTIndex & index, const T v)
friend LUTIndex operator/(const LUTIndex & index, const T v)
{
return normalizeLUTIndex(index.toUnderType() / UInt32(v));
}
template <typename T>
friend inline LUTIndex operator/(const T v, const LUTIndex & index)
friend LUTIndex operator/(const T v, const LUTIndex & index)
{
return normalizeLUTIndex(UInt32(v) / index.toUnderType());
}
@ -172,12 +172,12 @@ public:
Int8 amount_of_offset_change_value; /// Usually -4 or 4, but look at Lord Howe Island. Multiply by OffsetChangeFactor
UInt8 time_at_offset_change_value; /// In seconds from beginning of the day. Multiply by OffsetChangeFactor
inline Int32 amount_of_offset_change() const /// NOLINT
Int32 amount_of_offset_change() const /// NOLINT
{
return static_cast<Int32>(amount_of_offset_change_value) * OffsetChangeFactor;
}
inline UInt32 time_at_offset_change() const /// NOLINT
UInt32 time_at_offset_change() const /// NOLINT
{
return static_cast<UInt32>(time_at_offset_change_value) * OffsetChangeFactor;
}
@ -221,7 +221,7 @@ private:
/// Time zone name.
std::string time_zone;
inline LUTIndex findIndex(Time t) const
LUTIndex findIndex(Time t) const
{
/// First guess.
Time guess = (t / 86400) + daynum_offset_epoch;
@ -248,34 +248,34 @@ private:
return LUTIndex(guess ? static_cast<unsigned>(guess) - 1 : 0);
}
static inline LUTIndex toLUTIndex(DayNum d)
static LUTIndex toLUTIndex(DayNum d)
{
return normalizeLUTIndex(d + daynum_offset_epoch);
}
static inline LUTIndex toLUTIndex(ExtendedDayNum d)
static LUTIndex toLUTIndex(ExtendedDayNum d)
{
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
}
inline LUTIndex toLUTIndex(Time t) const
LUTIndex toLUTIndex(Time t) const
{
return findIndex(t);
}
static inline LUTIndex toLUTIndex(LUTIndex i)
static LUTIndex toLUTIndex(LUTIndex i)
{
return i;
}
template <typename DateOrTime>
inline const Values & find(DateOrTime v) const
const Values & find(DateOrTime v) const
{
return lut[toLUTIndex(v)];
}
template <typename DateOrTime, typename Divisor>
inline DateOrTime roundDown(DateOrTime x, Divisor divisor) const
DateOrTime roundDown(DateOrTime x, Divisor divisor) const
{
static_assert(std::is_integral_v<DateOrTime> && std::is_integral_v<Divisor>);
assert(divisor > 0);
@ -336,7 +336,7 @@ public:
}
template <typename DateOrTime>
inline auto toDayNum(DateOrTime v) const
auto toDayNum(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return DayNum{static_cast<DayNum::UnderlyingType>(saturateMinus(toLUTIndex(v).toUnderType(), daynum_offset_epoch))};
@ -346,7 +346,7 @@ public:
/// Round down to start of monday.
template <typename DateOrTime>
inline Time toFirstDayOfWeek(DateOrTime v) const
Time toFirstDayOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -356,7 +356,7 @@ public:
}
template <typename DateOrTime>
inline auto toFirstDayNumOfWeek(DateOrTime v) const
auto toFirstDayNumOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -367,7 +367,7 @@ public:
/// Round up to the last day of week.
template <typename DateOrTime>
inline Time toLastDayOfWeek(DateOrTime v) const
Time toLastDayOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -377,7 +377,7 @@ public:
}
template <typename DateOrTime>
inline auto toLastDayNumOfWeek(DateOrTime v) const
auto toLastDayNumOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -388,7 +388,7 @@ public:
/// Round down to start of month.
template <typename DateOrTime>
inline Time toFirstDayOfMonth(DateOrTime v) const
Time toFirstDayOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -398,7 +398,7 @@ public:
}
template <typename DateOrTime>
inline auto toFirstDayNumOfMonth(DateOrTime v) const
auto toFirstDayNumOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -409,7 +409,7 @@ public:
/// Round up to last day of month.
template <typename DateOrTime>
inline Time toLastDayOfMonth(DateOrTime v) const
Time toLastDayOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -419,7 +419,7 @@ public:
}
template <typename DateOrTime>
inline auto toLastDayNumOfMonth(DateOrTime v) const
auto toLastDayNumOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -430,7 +430,7 @@ public:
/// Round down to start of quarter.
template <typename DateOrTime>
inline auto toFirstDayNumOfQuarter(DateOrTime v) const
auto toFirstDayNumOfQuarter(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(toFirstDayOfQuarterIndex(v)));
@ -439,7 +439,7 @@ public:
}
template <typename DateOrTime>
inline LUTIndex toFirstDayOfQuarterIndex(DateOrTime v) const
LUTIndex toFirstDayOfQuarterIndex(DateOrTime v) const
{
LUTIndex index = toLUTIndex(v);
size_t month_inside_quarter = (lut[index].month - 1) % 3;
@ -455,25 +455,25 @@ public:
}
template <typename DateOrTime>
inline Time toFirstDayOfQuarter(DateOrTime v) const
Time toFirstDayOfQuarter(DateOrTime v) const
{
return toDate(toFirstDayOfQuarterIndex(v));
}
/// Round down to start of year.
inline Time toFirstDayOfYear(Time t) const
Time toFirstDayOfYear(Time t) const
{
return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date;
}
template <typename DateOrTime>
inline LUTIndex toFirstDayNumOfYearIndex(DateOrTime v) const
LUTIndex toFirstDayNumOfYearIndex(DateOrTime v) const
{
return years_lut[lut[toLUTIndex(v)].year - DATE_LUT_MIN_YEAR];
}
template <typename DateOrTime>
inline auto toFirstDayNumOfYear(DateOrTime v) const
auto toFirstDayNumOfYear(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfYearIndex(v)));
@ -481,14 +481,14 @@ public:
return toDayNum(LUTIndex(toFirstDayNumOfYearIndex(v)));
}
inline Time toFirstDayOfNextMonth(Time t) const
Time toFirstDayOfNextMonth(Time t) const
{
LUTIndex index = findIndex(t);
index += 32 - lut[index].day_of_month;
return lut[index - (lut[index].day_of_month - 1)].date;
}
inline Time toFirstDayOfPrevMonth(Time t) const
Time toFirstDayOfPrevMonth(Time t) const
{
LUTIndex index = findIndex(t);
index -= lut[index].day_of_month;
@ -496,13 +496,13 @@ public:
}
template <typename DateOrTime>
inline UInt8 daysInMonth(DateOrTime value) const
UInt8 daysInMonth(DateOrTime value) const
{
const LUTIndex i = toLUTIndex(value);
return lut[i].days_in_month;
}
inline UInt8 daysInMonth(Int16 year, UInt8 month) const
UInt8 daysInMonth(Int16 year, UInt8 month) const
{
UInt16 idx = year - DATE_LUT_MIN_YEAR;
if (unlikely(idx >= DATE_LUT_YEARS))
@ -515,12 +515,12 @@ public:
/** Round to start of day, then shift for specified amount of days.
*/
inline Time toDateAndShift(Time t, Int32 days) const
Time toDateAndShift(Time t, Int32 days) const
{
return lut[findIndex(t) + days].date;
}
inline Time toTime(Time t) const
Time toTime(Time t) const
{
const LUTIndex index = findIndex(t);
@ -532,7 +532,7 @@ public:
return res - offset_at_start_of_epoch; /// Starting at 1970-01-01 00:00:00 local time.
}
inline unsigned toHour(Time t) const
unsigned toHour(Time t) const
{
const LUTIndex index = findIndex(t);
@ -552,7 +552,7 @@ public:
* then subtract the former from the latter to get the offset result.
* The boundaries when meets DST(daylight saving time) change should be handled very carefully.
*/
inline Time timezoneOffset(Time t) const
Time timezoneOffset(Time t) const
{
const LUTIndex index = findIndex(t);
@ -574,7 +574,7 @@ public:
}
inline unsigned toSecond(Time t) const
unsigned toSecond(Time t) const
{
if (likely(offset_is_whole_number_of_minutes_during_epoch))
{
@ -593,7 +593,7 @@ public:
return time % 60;
}
inline unsigned toMinute(Time t) const
unsigned toMinute(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return (t / 60) % 60;
@ -630,11 +630,11 @@ public:
* because the same calendar day starts/ends at different timestamps in different time zones)
*/
inline Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
template <typename DateOrTime>
inline Time toDate(DateOrTime v) const
Time toDate(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return lut_saturated[toLUTIndex(v)].date;
@ -643,20 +643,20 @@ public:
}
template <typename DateOrTime>
inline UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
template <typename DateOrTime>
inline UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
template <typename DateOrTime>
inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
/// 1-based, starts on Monday
template <typename DateOrTime>
inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
template <typename DateOrTime>
inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
{
WeekDayMode mode = check_week_day_mode(week_day_mode);
@ -674,10 +674,10 @@ public:
}
template <typename DateOrTime>
inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
template <typename DateOrTime>
inline UInt16 toDayOfYear(DateOrTime v) const
UInt16 toDayOfYear(DateOrTime v) const
{
// TODO: different overload for ExtendedDayNum
const LUTIndex i = toLUTIndex(v);
@ -688,7 +688,7 @@ public:
/// (round down to monday and divide DayNum by 7; we made an assumption,
/// that in domain of the function there was no weeks with any other number of days than 7)
template <typename DateOrTime>
inline Int32 toRelativeWeekNum(DateOrTime v) const
Int32 toRelativeWeekNum(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
/// We add 8 to avoid underflow at beginning of unix epoch.
@ -697,7 +697,7 @@ public:
/// Get year that contains most of the current week. Week begins at monday.
template <typename DateOrTime>
inline Int16 toISOYear(DateOrTime v) const
Int16 toISOYear(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
/// That's effectively the year of thursday of current week.
@ -708,7 +708,7 @@ public:
/// Example: ISO year 2019 begins at 2018-12-31. And ISO year 2017 begins at 2017-01-02.
/// https://en.wikipedia.org/wiki/ISO_week_date
template <typename DateOrTime>
inline LUTIndex toFirstDayNumOfISOYearIndex(DateOrTime v) const
LUTIndex toFirstDayNumOfISOYearIndex(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
auto iso_year = toISOYear(i);
@ -722,7 +722,7 @@ public:
}
template <typename DateOrTime>
inline auto toFirstDayNumOfISOYear(DateOrTime v) const
auto toFirstDayNumOfISOYear(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfISOYearIndex(v)));
@ -730,7 +730,7 @@ public:
return toDayNum(LUTIndex(toFirstDayNumOfISOYearIndex(v)));
}
inline Time toFirstDayOfISOYear(Time t) const
Time toFirstDayOfISOYear(Time t) const
{
return lut[toFirstDayNumOfISOYearIndex(t)].date;
}
@ -738,7 +738,7 @@ public:
/// ISO 8601 week number. Week begins at monday.
/// The week number 1 is the first week in year that contains 4 or more days (that's more than half).
template <typename DateOrTime>
inline UInt8 toISOWeek(DateOrTime v) const
UInt8 toISOWeek(DateOrTime v) const
{
return 1 + (toFirstDayNumOfWeek(v) - toDayNum(toFirstDayNumOfISOYearIndex(v))) / 7;
}
@ -777,7 +777,7 @@ public:
next week is week 1.
*/
template <typename DateOrTime>
inline YearWeek toYearWeek(DateOrTime v, UInt8 week_mode) const
YearWeek toYearWeek(DateOrTime v, UInt8 week_mode) const
{
const bool newyear_day_mode = week_mode & static_cast<UInt8>(WeekModeFlag::NEWYEAR_DAY);
week_mode = check_week_mode(week_mode);
@ -836,7 +836,7 @@ public:
/// Calculate week number of WeekModeFlag::NEWYEAR_DAY mode
/// The week number 1 is the first week in year that contains January 1,
template <typename DateOrTime>
inline YearWeek toYearWeekOfNewyearMode(DateOrTime v, bool monday_first_mode) const
YearWeek toYearWeekOfNewyearMode(DateOrTime v, bool monday_first_mode) const
{
YearWeek yw(0, 0);
UInt16 offset_day = monday_first_mode ? 0U : 1U;
@ -870,7 +870,7 @@ public:
/// Get first day of week with week_mode, return Sunday or Monday
template <typename DateOrTime>
inline auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
{
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
if (monday_first_mode)
@ -889,7 +889,7 @@ public:
/// Get last day of week with week_mode, return Saturday or Sunday
template <typename DateOrTime>
inline auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
{
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
if (monday_first_mode)
@ -908,7 +908,7 @@ public:
}
/// Check and change mode to effective.
inline UInt8 check_week_mode(UInt8 mode) const /// NOLINT
UInt8 check_week_mode(UInt8 mode) const /// NOLINT
{
UInt8 week_format = (mode & 7);
if (!(week_format & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST)))
@ -917,7 +917,7 @@ public:
}
/// Check and change mode to effective.
inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
{
return static_cast<WeekDayMode>(mode & 3);
}
@ -926,7 +926,7 @@ public:
* Returns 0 for monday, 1 for tuesday...
*/
template <typename DateOrTime>
inline UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
{
const LUTIndex i = toLUTIndex(v);
if (!sunday_first_day_of_week)
@ -936,28 +936,28 @@ public:
}
/// Calculate days in one year.
inline UInt16 calc_days_in_year(Int32 year) const /// NOLINT
UInt16 calc_days_in_year(Int32 year) const /// NOLINT
{
return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
}
/// Number of month from some fixed moment in the past (year * 12 + month)
template <typename DateOrTime>
inline Int32 toRelativeMonthNum(DateOrTime v) const
Int32 toRelativeMonthNum(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i].year * 12 + lut[i].month;
}
template <typename DateOrTime>
inline Int32 toRelativeQuarterNum(DateOrTime v) const
Int32 toRelativeQuarterNum(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i].year * 4 + (lut[i].month - 1) / 3;
}
/// We count all hour-length intervals, unrelated to offset changes.
inline Time toRelativeHourNum(Time t) const
Time toRelativeHourNum(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 3600;
@ -968,37 +968,37 @@ public:
}
template <typename DateOrTime>
inline Time toRelativeHourNum(DateOrTime v) const
Time toRelativeHourNum(DateOrTime v) const
{
return toRelativeHourNum(lut[toLUTIndex(v)].date);
}
/// The same formula is used for positive time (after Unix epoch) and negative time (before Unix epoch).
/// Its needed for correct work of dateDiff function.
inline Time toStableRelativeHourNum(Time t) const
Time toStableRelativeHourNum(Time t) const
{
return (t + DATE_LUT_ADD + 86400 - offset_at_start_of_epoch) / 3600 - (DATE_LUT_ADD / 3600);
}
template <typename DateOrTime>
inline Time toStableRelativeHourNum(DateOrTime v) const
Time toStableRelativeHourNum(DateOrTime v) const
{
return toStableRelativeHourNum(lut[toLUTIndex(v)].date);
}
inline Time toRelativeMinuteNum(Time t) const /// NOLINT
Time toRelativeMinuteNum(Time t) const /// NOLINT
{
return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60);
}
template <typename DateOrTime>
inline Time toRelativeMinuteNum(DateOrTime v) const
Time toRelativeMinuteNum(DateOrTime v) const
{
return toRelativeMinuteNum(lut[toLUTIndex(v)].date);
}
template <typename DateOrTime>
inline auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
{
if (years == 1)
return toFirstDayNumOfYear(v);
@ -1019,7 +1019,7 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
inline auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
{
if (quarters == 1)
return toFirstDayNumOfQuarter(d);
@ -1028,7 +1028,7 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
inline auto toStartOfMonthInterval(Date d, UInt64 months) const
auto toStartOfMonthInterval(Date d, UInt64 months) const
{
if (months == 1)
return toFirstDayNumOfMonth(d);
@ -1042,7 +1042,7 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
inline auto toStartOfWeekInterval(Date d, UInt64 weeks) const
auto toStartOfWeekInterval(Date d, UInt64 weeks) const
{
if (weeks == 1)
return toFirstDayNumOfWeek(d);
@ -1056,7 +1056,7 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
inline Time toStartOfDayInterval(Date d, UInt64 days) const
Time toStartOfDayInterval(Date d, UInt64 days) const
{
if (days == 1)
return toDate(d);
@ -1152,7 +1152,7 @@ public:
return static_cast<DateOrTime>(roundDown(t, seconds));
}
inline LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
{
if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
return LUTIndex(0);
@ -1167,7 +1167,7 @@ public:
}
/// Create DayNum from year, month, day of month.
inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
{
if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
return ExtendedDayNum(default_error_day_num);
@ -1175,14 +1175,14 @@ public:
return toDayNum(makeLUTIndex(year, month, day_of_month));
}
inline Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
{
return lut[makeLUTIndex(year, month, day_of_month)].date;
}
/** Does not accept daylight saving time as argument: in case of ambiguity, it choose greater timestamp.
*/
inline Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
{
size_t index = makeLUTIndex(year, month, day_of_month);
Time time_offset = hour * 3600 + minute * 60 + second;
@ -1194,28 +1194,28 @@ public:
}
template <typename DateOrTime>
inline const Values & getValues(DateOrTime v) const { return lut[toLUTIndex(v)]; }
const Values & getValues(DateOrTime v) const { return lut[toLUTIndex(v)]; }
template <typename DateOrTime>
inline UInt32 toNumYYYYMM(DateOrTime v) const
UInt32 toNumYYYYMM(DateOrTime v) const
{
const Values & values = getValues(v);
return values.year * 100 + values.month;
}
template <typename DateOrTime>
inline UInt32 toNumYYYYMMDD(DateOrTime v) const
UInt32 toNumYYYYMMDD(DateOrTime v) const
{
const Values & values = getValues(v);
return values.year * 10000 + values.month * 100 + values.day_of_month;
}
inline Time YYYYMMDDToDate(UInt32 num) const /// NOLINT
Time YYYYMMDDToDate(UInt32 num) const /// NOLINT
{
return makeDate(num / 10000, num / 100 % 100, num % 100);
}
inline ExtendedDayNum YYYYMMDDToDayNum(UInt32 num) const /// NOLINT
ExtendedDayNum YYYYMMDDToDayNum(UInt32 num) const /// NOLINT
{
return makeDayNum(num / 10000, num / 100 % 100, num % 100);
}
@ -1241,13 +1241,13 @@ public:
TimeComponents time;
};
inline DateComponents toDateComponents(Time t) const
DateComponents toDateComponents(Time t) const
{
const Values & values = getValues(t);
return { values.year, values.month, values.day_of_month };
}
inline DateTimeComponents toDateTimeComponents(Time t) const
DateTimeComponents toDateTimeComponents(Time t) const
{
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
@ -1283,12 +1283,12 @@ public:
}
template <typename DateOrTime>
inline DateTimeComponents toDateTimeComponents(DateOrTime v) const
DateTimeComponents toDateTimeComponents(DateOrTime v) const
{
return toDateTimeComponents(lut[toLUTIndex(v)].date);
}
inline UInt64 toNumYYYYMMDDhhmmss(Time t) const
UInt64 toNumYYYYMMDDhhmmss(Time t) const
{
DateTimeComponents components = toDateTimeComponents(t);
@ -1301,7 +1301,7 @@ public:
+ UInt64(components.date.year) * 10000000000;
}
inline Time YYYYMMDDhhmmssToTime(UInt64 num) const /// NOLINT
Time YYYYMMDDhhmmssToTime(UInt64 num) const /// NOLINT
{
return makeDateTime(
num / 10000000000,
@ -1315,7 +1315,7 @@ public:
/// Adding calendar intervals.
/// Implementation specific behaviour when delta is too big.
inline NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
{
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
@ -1332,12 +1332,12 @@ public:
return lut[new_index].date + time;
}
inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
{
return addDays(t, delta * 7);
}
inline UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
{
if (likely(day_of_month <= 28))
return day_of_month;
@ -1351,7 +1351,7 @@ public:
}
template <typename DateOrTime>
inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
{
const Values & values = lut[toLUTIndex(v)];
@ -1375,11 +1375,11 @@ public:
}
}
/// If resulting month has less deys than source month, then saturation can happen.
/// If resulting month has less days than source month, then saturation can happen.
/// Example: 31 Aug + 1 month = 30 Sep.
template <typename DateTime>
requires std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>
inline Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
{
const auto result_day = addMonthsIndex(t, delta);
@ -1405,7 +1405,7 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
inline auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
{
if constexpr (std::is_same_v<Date, DayNum>)
return toDayNum(LUTIndexWithSaturation(addMonthsIndex(d, delta)));
@ -1414,13 +1414,13 @@ public:
}
template <typename DateOrTime>
inline auto NO_SANITIZE_UNDEFINED addQuarters(DateOrTime d, Int64 delta) const
auto NO_SANITIZE_UNDEFINED addQuarters(DateOrTime d, Int64 delta) const
{
return addMonths(d, delta * 3);
}
template <typename DateOrTime>
inline LUTIndex NO_SANITIZE_UNDEFINED addYearsIndex(DateOrTime v, Int64 delta) const
LUTIndex NO_SANITIZE_UNDEFINED addYearsIndex(DateOrTime v, Int64 delta) const
{
const Values & values = lut[toLUTIndex(v)];
@ -1438,7 +1438,7 @@ public:
/// Saturation can occur if 29 Feb is mapped to non-leap year.
template <typename DateTime>
requires std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>
inline Time addYears(DateTime t, Int64 delta) const
Time addYears(DateTime t, Int64 delta) const
{
auto result_day = addYearsIndex(t, delta);
@ -1464,7 +1464,7 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
inline auto addYears(Date d, Int64 delta) const
auto addYears(Date d, Int64 delta) const
{
if constexpr (std::is_same_v<Date, DayNum>)
return toDayNum(LUTIndexWithSaturation(addYearsIndex(d, delta)));
@ -1473,7 +1473,7 @@ public:
}
inline std::string timeToString(Time t) const
std::string timeToString(Time t) const
{
DateTimeComponents components = toDateTimeComponents(t);
@ -1498,7 +1498,7 @@ public:
return s;
}
inline std::string dateToString(Time t) const
std::string dateToString(Time t) const
{
const Values & values = getValues(t);
@ -1516,7 +1516,7 @@ public:
return s;
}
inline std::string dateToString(ExtendedDayNum d) const
std::string dateToString(ExtendedDayNum d) const
{
const Values & values = getValues(d);

View File

@ -7,11 +7,19 @@
#include <filesystem>
#include <format>
#include <map>
#include <re2/re2.h>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/replace.hpp>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace fs = std::filesystem;
namespace DB

View File

@ -2,7 +2,14 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{

View File

@ -8,7 +8,7 @@
* table, so it makes sense to pre-fault the pages so that page faults don't
* interrupt the resize loop. Set the allocator parameter accordingly.
*/
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
using HashTableAllocator = Allocator<true /* clear_memory */, true /* populate */>;
template <size_t initial_bytes = 64>
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;

View File

@ -8,6 +8,9 @@
# include <base/defines.h>
# include <simdjson.h>
# include "ElementTypes.h"
# include <Common/PODArray_fwd.h>
# include <Common/PODArray.h>
# include <charconv>
namespace DB
{
@ -16,6 +19,254 @@ namespace ErrorCodes
extern const int CANNOT_ALLOCATE_MEMORY;
}
/// Format elements of basic types into string.
/// The original implementation is mini_formatter in simdjson.h. But it is not public API, so we
/// add a implementation here.
class SimdJSONBasicFormatter
{
public:
explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
inline void comma() { oneChar(','); }
/** Start an array, prints [ **/
inline void startArray() { oneChar('['); }
/** End an array, prints ] **/
inline void endArray() { oneChar(']'); }
/** Start an array, prints { **/
inline void startObject() { oneChar('{'); }
/** Start an array, prints } **/
inline void endObject() { oneChar('}'); }
/** Prints a true **/
inline void trueAtom()
{
const char * s = "true";
buffer.insert(s, s + 4);
}
/** Prints a false **/
inline void falseAtom()
{
const char * s = "false";
buffer.insert(s, s + 5);
}
/** Prints a null **/
inline void nullAtom()
{
const char * s = "null";
buffer.insert(s, s + 4);
}
/** Prints a number **/
inline void number(int64_t x)
{
char number_buffer[24];
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
buffer.insert(number_buffer, res.ptr);
}
/** Prints a number **/
inline void number(uint64_t x)
{
char number_buffer[24];
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
buffer.insert(number_buffer, res.ptr);
}
/** Prints a number **/
inline void number(double x)
{
char number_buffer[24];
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
buffer.insert(number_buffer, res.ptr);
}
/** Prints a key (string + colon) **/
inline void key(std::string_view unescaped)
{
string(unescaped);
oneChar(':');
}
/** Prints a string. The string is escaped as needed. **/
inline void string(std::string_view unescaped)
{
oneChar('\"');
size_t i = 0;
// Fast path for the case where we have no control character, no ", and no backslash.
// This should include most keys.
//
// We would like to use 'bool' but some compilers take offense to bitwise operation
// with bool types.
constexpr static char needs_escaping[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
for (; i + 8 <= unescaped.length(); i += 8)
{
// Poor's man vectorization. This could get much faster if we used SIMD.
//
// It is not the case that replacing '|' with '||' would be neutral performance-wise.
if (needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i + 1])]
| needs_escaping[uint8_t(unescaped[i + 2])] | needs_escaping[uint8_t(unescaped[i + 3])]
| needs_escaping[uint8_t(unescaped[i + 4])] | needs_escaping[uint8_t(unescaped[i + 5])]
| needs_escaping[uint8_t(unescaped[i + 6])] | needs_escaping[uint8_t(unescaped[i + 7])])
{
break;
}
}
for (; i < unescaped.length(); i++)
{
if (needs_escaping[uint8_t(unescaped[i])])
{
break;
}
}
// The following is also possible and omits a 256-byte table, but it is slower:
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
// At least for long strings, the following should be fast. We could
// do better by integrating the checks and the insertion.
buffer.insert(unescaped.data(), unescaped.data() + i);
// We caught a control character if we enter this loop (slow).
// Note that we are do not restart from the beginning, but rather we continue
// from the point where we encountered something that requires escaping.
for (; i < unescaped.length(); i++)
{
switch (unescaped[i])
{
case '\"': {
const char * s = "\\\"";
buffer.insert(s, s + 2);
}
break;
case '\\': {
const char * s = "\\\\";
buffer.insert(s, s + 2);
}
break;
default:
if (uint8_t(unescaped[i]) <= 0x1F)
{
// If packed, this uses 8 * 32 bytes.
// Note that we expect most compilers to embed this code in the data
// section.
constexpr static simdjson::escape_sequence escaped[32] = {
{6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"},
{6, "\\u0007"}, {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, {2, "\\f"}, {2, "\\r"},
{6, "\\u000e"}, {6, "\\u000f"}, {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, {6, "\\u0014"},
{6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
{6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
auto u = escaped[uint8_t(unescaped[i])];
buffer.insert(u.string, u.string + u.length);
}
else
{
oneChar(unescaped[i]);
}
} // switch
} // for
oneChar('\"');
}
inline void oneChar(char c)
{
buffer.push_back(c);
}
private:
PaddedPODArray<UInt8> & buffer;
};
/// Format object elements into string, element, array, object, kv-pair.
/// Similar to string_builder in simdjson.h.
class SimdJSONElementFormatter
{
public:
explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
/** Append an element to the builder (to be printed) **/
inline void append(simdjson::dom::element value)
{
switch (value.type())
{
case simdjson::dom::element_type::UINT64: {
format.number(value.get_uint64().value_unsafe());
break;
}
case simdjson::dom::element_type::INT64: {
format.number(value.get_int64().value_unsafe());
break;
}
case simdjson::dom::element_type::DOUBLE: {
format.number(value.get_double().value_unsafe());
break;
}
case simdjson::dom::element_type::STRING: {
format.string(value.get_string().value_unsafe());
break;
}
case simdjson::dom::element_type::BOOL: {
if (value.get_bool().value_unsafe())
format.trueAtom();
else
format.falseAtom();
break;
}
case simdjson::dom::element_type::NULL_VALUE: {
format.nullAtom();
break;
}
case simdjson::dom::element_type::ARRAY: {
append(value.get_array().value_unsafe());
break;
}
case simdjson::dom::element_type::OBJECT: {
append(value.get_object().value_unsafe());
break;
}
}
}
/** Append an array to the builder (to be printed) **/
inline void append(simdjson::dom::array value)
{
format.startArray();
auto iter = value.begin();
auto end = value.end();
if (iter != end)
{
append(*iter);
for (++iter; iter != end; ++iter)
{
format.comma();
append(*iter);
}
}
format.endArray();
}
inline void append(simdjson::dom::object value)
{
format.startObject();
auto pair = value.begin();
auto end = value.end();
if (pair != end)
{
append(*pair);
for (++pair; pair != end; ++pair)
{
format.comma();
append(*pair);
}
}
format.endObject();
}
inline void append(simdjson::dom::key_value_pair kv)
{
format.key(kv.key);
append(kv.value);
}
private:
SimdJSONBasicFormatter format;
};
/// This class can be used as an argument for the template class FunctionJSON.
/// It provides ability to parse JSONs using simdjson library.
struct SimdJSONParser

View File

@ -441,8 +441,7 @@ finish:
}
}
template <bool thread_safe>
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
void OptimizedRegularExpression::analyze(
std::string_view regexp_,
std::string & required_substring,
bool & is_trivial,
@ -467,8 +466,7 @@ catch (...)
LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false));
}
template <bool thread_safe>
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
OptimizedRegularExpression::OptimizedRegularExpression(const std::string & regexp_, int options)
{
std::vector<std::string> alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy);
@ -486,7 +484,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
if (!is_trivial)
{
/// Compile the re2 regular expression.
typename RegexType::Options regexp_options;
typename re2::RE2::Options regexp_options;
/// Never write error messages to stderr. It's ignorant to do it from library code.
regexp_options.set_log_errors(false);
@ -497,7 +495,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
if (is_dot_nl)
regexp_options.set_dot_nl(true);
re2 = std::make_unique<RegexType>(regexp_, regexp_options);
re2 = std::make_unique<re2::RE2>(regexp_, regexp_options);
if (!re2->ok())
{
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
@ -527,8 +525,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
}
}
template <bool thread_safe>
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept
OptimizedRegularExpression::OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept
: is_trivial(rhs.is_trivial)
, required_substring_is_prefix(rhs.required_substring_is_prefix)
, is_case_insensitive(rhs.is_case_insensitive)
@ -545,8 +542,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(Opti
}
}
template <bool thread_safe>
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
bool OptimizedRegularExpression::match(const char * subject, size_t subject_size) const
{
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
const UInt8 * haystack_end = haystack + subject_size;
@ -577,13 +573,12 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
}
}
return re2->Match({subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
return re2->Match({subject, subject_size}, 0, subject_size, re2::RE2::UNANCHORED, nullptr, 0);
}
}
template <bool thread_safe>
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
bool OptimizedRegularExpression::match(const char * subject, size_t subject_size, Match & match) const
{
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
const UInt8 * haystack_end = haystack + subject_size;
@ -624,7 +619,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
std::string_view piece;
if (!RegexType::PartialMatch({subject, subject_size}, *re2, &piece))
if (!re2::RE2::PartialMatch({subject, subject_size}, *re2, &piece))
return false;
else
{
@ -636,8 +631,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
}
template <bool thread_safe>
unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
unsigned OptimizedRegularExpression::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
{
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
const UInt8 * haystack_end = haystack + subject_size;
@ -695,7 +689,7 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
{subject, subject_size},
0,
subject_size,
RegexType::UNANCHORED,
re2::RE2::UNANCHORED,
pieces.data(),
static_cast<int>(pieces.size())))
{
@ -721,6 +715,3 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
}
}
}
template class OptimizedRegularExpressionImpl<true>;
template class OptimizedRegularExpressionImpl<false>;

View File

@ -6,9 +6,15 @@
#include <optional>
#include <Common/StringSearcher.h>
#include "config.h"
#include <re2/re2.h>
#include <re2_st/re2.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
/** Uses two ways to optimize a regular expression:
* 1. If the regular expression is trivial (reduces to finding a substring in a string),
@ -37,8 +43,7 @@ namespace OptimizedRegularExpressionDetails
};
}
template <bool thread_safe>
class OptimizedRegularExpressionImpl
class OptimizedRegularExpression
{
public:
enum Options
@ -51,12 +56,10 @@ public:
using Match = OptimizedRegularExpressionDetails::Match;
using MatchVec = std::vector<Match>;
using RegexType = std::conditional_t<thread_safe, re2::RE2, re2_st::RE2>;
OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT
OptimizedRegularExpression(const std::string & regexp_, int options = 0); /// NOLINT
/// StringSearcher store pointers to required_substring, it must be updated on move.
OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept;
OptimizedRegularExpressionImpl(const OptimizedRegularExpressionImpl & rhs) = delete;
OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept;
OptimizedRegularExpression(const OptimizedRegularExpression & rhs) = delete;
bool match(const std::string & subject) const
{
@ -85,7 +88,7 @@ public:
unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }
/// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
const std::unique_ptr<RegexType> & getRE2() const { return re2; }
const std::unique_ptr<re2::RE2> & getRE2() const { return re2; }
void getAnalyzeResult(std::string & out_required_substring, bool & out_is_trivial, bool & out_required_substring_is_prefix) const
{
@ -110,9 +113,6 @@ private:
std::string required_substring;
std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
std::unique_ptr<RegexType> re2;
std::unique_ptr<re2::RE2> re2;
unsigned number_of_subpatterns;
};
using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
using OptimizedRegularExpressionSingleThreaded = OptimizedRegularExpressionImpl<false>;

View File

@ -253,6 +253,13 @@ The server successfully detected this situation and will download merged part fr
M(PolygonsAddedToPool, "A polygon has been added to the cache (pool) for the 'pointInPolygon' function.") \
M(PolygonsInPoolAllocatedBytes, "The number of bytes for polygons added to the cache (pool) for the 'pointInPolygon' function.") \
\
M(USearchAddCount, "Number of vectors added to usearch indexes.") \
M(USearchAddVisitedMembers, "Number of nodes visited when adding vectors to usearch indexes.") \
M(USearchAddComputedDistances, "Number of times distance was computed when adding vectors to usearch indexes.") \
M(USearchSearchCount, "Number of search operations performed in usearch indexes.") \
M(USearchSearchVisitedMembers, "Number of nodes visited when searching in usearch indexes.") \
M(USearchSearchComputedDistances, "Number of times distance was computed when searching usearch indexes.") \
\
M(RWLockAcquiredReadLocks, "Number of times a read lock was acquired (in a heavy RWLock).") \
M(RWLockAcquiredWriteLocks, "Number of times a write lock was acquired (in a heavy RWLock).") \
M(RWLockReadersWaitMilliseconds, "Total time spent waiting for a read lock to be acquired (in a heavy RWLock).") \

View File

@ -1,4 +1,3 @@
#include <re2/re2.h>
#include <Poco/URI.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/RemoteHostFilter.h>
@ -6,6 +5,14 @@
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
#include <re2/re2.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{

Some files were not shown because too many files have changed in this diff Show More