mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge remote-tracking branch 'origin/master' into parallel-replicas-not-enough-replicas
This commit is contained in:
commit
047d214436
@ -79,7 +79,7 @@ IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
NamespaceIndentation: Inner
|
||||
NamespaceIndentation: None
|
||||
ObjCBlockIndentWidth: 4
|
||||
ObjCSpaceAfterProperty: true
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
@ -89,6 +89,7 @@ PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
RemoveBracesLLVM: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
|
45
.github/workflows/auto_release.yml
vendored
Normal file
45
.github/workflows/auto_release.yml
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
name: AutoRelease
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
concurrency:
|
||||
group: auto-release
|
||||
on: # yamllint disable-line rule:truthy
|
||||
# schedule:
|
||||
# - cron: '0 10-16 * * 1-5'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
CherryPick:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/cherry_pick
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
REPO_OWNER=ClickHouse
|
||||
REPO_NAME=ClickHouse
|
||||
REPO_TEAM=core
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
fetch-depth: 0
|
||||
- name: Auto-release
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --release-after-days=3
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
@ -47,7 +47,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif()
|
||||
|
||||
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
|
||||
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")
|
||||
|
||||
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
2
contrib/abseil-cpp
vendored
2
contrib/abseil-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf
|
||||
Subproject commit 5655528c41830f733160de4fb0b99073841bae9e
|
@ -1,5 +1,5 @@
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
set(BUILD_TESTING OFF)
|
||||
|
||||
set(ABSL_PROPAGATE_CXX_STD ON)
|
||||
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
|
||||
|
||||
|
2
contrib/re2
vendored
2
contrib/re2
vendored
@ -1 +1 @@
|
||||
Subproject commit 03da4fc0857c285e3a26782f6bc8931c4c950df4
|
||||
Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c
|
@ -1,14 +1,3 @@
|
||||
# Copyright 2015 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# This file was edited for ClickHouse
|
||||
|
||||
string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space)
|
||||
if(_have_space GREATER 0)
|
||||
message(FATAL_ERROR "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.")
|
||||
endif()
|
||||
|
||||
set(SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2")
|
||||
|
||||
set(RE2_SOURCES
|
||||
@ -35,33 +24,9 @@ set(RE2_SOURCES
|
||||
${SRC_DIR}/util/rune.cc
|
||||
${SRC_DIR}/util/strutil.cc
|
||||
)
|
||||
add_library(re2 ${RE2_SOURCES})
|
||||
target_include_directories(re2 PUBLIC "${SRC_DIR}")
|
||||
target_link_libraries(re2 ch_contrib::abseil_str_format)
|
||||
|
||||
# Building re2 which is thread-safe and re2_st which is not.
|
||||
# re2 changes its state during matching of regular expression, e.g. creates temporary DFA.
|
||||
# It uses RWLock to process the same regular expression object from different threads.
|
||||
# In order to avoid redundant locks in some cases, we use not thread-safe version of the library (re2_st).
|
||||
add_library(_re2 ${RE2_SOURCES})
|
||||
target_include_directories(_re2 PUBLIC "${SRC_DIR}")
|
||||
target_link_libraries(_re2 ch_contrib::abseil_str_format)
|
||||
|
||||
add_library(re2_st ${RE2_SOURCES})
|
||||
target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
|
||||
target_include_directories (re2_st PRIVATE .)
|
||||
target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories (re2_st SYSTEM BEFORE PUBLIC ${SRC_DIR})
|
||||
target_link_libraries (re2_st ch_contrib::abseil_str_format)
|
||||
|
||||
file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
|
||||
foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h)
|
||||
add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
|
||||
COMMAND ${CMAKE_COMMAND} -DSOURCE_FILENAME="${SRC_DIR}/re2/${FILENAME}"
|
||||
-DTARGET_FILENAME="${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
|
||||
-P "${CMAKE_CURRENT_SOURCE_DIR}/re2_transform.cmake"
|
||||
COMMENT "Creating ${FILENAME} for re2_st library.")
|
||||
add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}")
|
||||
add_dependencies (re2_st transform_${FILENAME})
|
||||
endforeach ()
|
||||
|
||||
# NOTE: you should not change name of library here, since it is used to generate required header (see above)
|
||||
add_library(ch_contrib::re2 ALIAS re2)
|
||||
add_library(ch_contrib::re2_st ALIAS re2_st)
|
||||
add_library(ch_contrib::re2 ALIAS _re2)
|
||||
|
@ -1,10 +0,0 @@
|
||||
file (READ ${SOURCE_FILENAME} CONTENT)
|
||||
string (REGEX REPLACE "using re2::RE2;" "" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "using re2::LazyRE2;" "" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "namespace re2 {" "namespace re2_st {" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "re2::" "re2_st::" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "\"re2/" "\"re2_st/" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "(.\\*?_H)" "\\1_ST" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "#define MUTEX_IS_PTHREAD_RWLOCK" "#undef MUTEX_IS_PTHREAD_RWLOCK" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "typedef std::mutex MutexType;" "struct MutexType { void lock() {} void unlock() {} };" CONTENT "${CONTENT}")
|
||||
file (WRITE ${TARGET_FILENAME} "${CONTENT}")
|
2
contrib/s2geometry
vendored
2
contrib/s2geometry
vendored
@ -1 +1 @@
|
||||
Subproject commit 4a7ebd5da04cb6c9ea38bbf5914a9f8f3c768564
|
||||
Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b
|
@ -7,12 +7,6 @@ endif()
|
||||
|
||||
set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")
|
||||
|
||||
set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt")
|
||||
message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
|
||||
|
||||
set(S2_SRCS
|
||||
"${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
|
||||
"${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
|
||||
@ -58,7 +52,9 @@ set(S2_SRCS
|
||||
"${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2error.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2latlng.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc"
|
||||
@ -93,59 +89,58 @@ set(S2_SRCS
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_measures.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_nesting_query.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2text_format.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
|
||||
"${S2_SOURCE_DIR}/s2/strings/serialize.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/coding/varint.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
|
||||
|
||||
)
|
||||
|
||||
add_library(_s2 ${S2_SRCS})
|
||||
add_library(ch_contrib::s2 ALIAS _s2)
|
||||
|
||||
set_property(TARGET _s2 PROPERTY CXX_STANDARD 17)
|
||||
|
||||
if (TARGET OpenSSL::SSL)
|
||||
target_link_libraries(_s2 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
|
||||
endif()
|
||||
|
||||
# Copied from contrib/s2geometry/CMakeLists
|
||||
target_link_libraries(_s2 PRIVATE
|
||||
absl::base
|
||||
absl::btree
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::dynamic_annotations
|
||||
absl::endian
|
||||
absl::fixed_array
|
||||
absl::flat_hash_map
|
||||
absl::flat_hash_set
|
||||
absl::hash
|
||||
absl::inlined_vector
|
||||
absl::int128
|
||||
absl::log_severity
|
||||
absl::memory
|
||||
absl::span
|
||||
absl::str_format
|
||||
absl::strings
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
)
|
||||
absl::base
|
||||
absl::btree
|
||||
absl::check
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::dynamic_annotations
|
||||
absl::endian
|
||||
absl::fixed_array
|
||||
absl::flags
|
||||
absl::flat_hash_map
|
||||
absl::flat_hash_set
|
||||
absl::hash
|
||||
absl::inlined_vector
|
||||
absl::int128
|
||||
absl::log
|
||||
absl::log_severity
|
||||
absl::memory
|
||||
absl::span
|
||||
absl::status
|
||||
absl::str_format
|
||||
absl::strings
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
)
|
||||
|
||||
target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
|
||||
target_include_directories(_s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}")
|
||||
|
2
contrib/sysroot
vendored
2
contrib/sysroot
vendored
@ -1 +1 @@
|
||||
Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb
|
||||
Subproject commit b5fcabb24d28fc33024291b2c6c1abd807c7dba8
|
2
contrib/usearch
vendored
2
contrib/usearch
vendored
@ -1 +1 @@
|
||||
Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba
|
||||
Subproject commit 955c6f9c11adfd89c912e0d1643d160b4e9e543f
|
@ -325,7 +325,6 @@ def parse_env_variables(
|
||||
|
||||
if additional_pkgs:
|
||||
# NOTE: This are the env for packages/build script
|
||||
result.append("MAKE_APK=true")
|
||||
result.append("MAKE_RPM=true")
|
||||
result.append("MAKE_TGZ=true")
|
||||
|
||||
|
@ -18,6 +18,7 @@ RUN apt-get update \
|
||||
python3-termcolor \
|
||||
unixodbc \
|
||||
pv \
|
||||
jq \
|
||||
zstd \
|
||||
--yes --no-install-recommends
|
||||
|
||||
|
@ -1,21 +1,15 @@
|
||||
# docker build -t clickhouse/mysql-java-client .
|
||||
# MySQL Java client docker container
|
||||
|
||||
FROM ubuntu:18.04
|
||||
FROM openjdk:8-jdk-alpine
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common build-essential openjdk-8-jdk libmysql-java curl
|
||||
RUN apk --no-cache add curl
|
||||
|
||||
RUN rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
/tmp/* \
|
||||
RUN apt-get clean
|
||||
|
||||
ARG ver=5.1.46
|
||||
RUN curl -L -o /mysql-connector-java-${ver}.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/${ver}/mysql-connector-java-${ver}.jar
|
||||
ENV CLASSPATH=$CLASSPATH:/mysql-connector-java-${ver}.jar
|
||||
ARG ver=8.1.0
|
||||
RUN curl -L -o /mysql-connector-j-${ver}.jar https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/${ver}/mysql-connector-j-${ver}.jar
|
||||
ENV CLASSPATH=$CLASSPATH:/mysql-connector-j-${ver}.jar
|
||||
|
||||
WORKDIR /jdbc
|
||||
COPY Test.java Test.java
|
||||
RUN javac Test.java
|
||||
COPY PreparedStatementsTest.java PreparedStatementsTest.java
|
||||
RUN javac Test.java PreparedStatementsTest.java
|
||||
|
@ -0,0 +1,193 @@
|
||||
import com.mysql.cj.MysqlType;
|
||||
|
||||
import java.sql.*;
|
||||
|
||||
public class PreparedStatementsTest {
|
||||
public static void main(String[] args) {
|
||||
int i = 0;
|
||||
String host = "127.0.0.1";
|
||||
String port = "9004";
|
||||
String user = "default";
|
||||
String password = "";
|
||||
String database = "default";
|
||||
while (i < args.length) {
|
||||
switch (args[i]) {
|
||||
case "--host":
|
||||
host = args[++i];
|
||||
break;
|
||||
case "--port":
|
||||
port = args[++i];
|
||||
break;
|
||||
case "--user":
|
||||
user = args[++i];
|
||||
break;
|
||||
case "--password":
|
||||
password = args[++i];
|
||||
break;
|
||||
case "--database":
|
||||
database = args[++i];
|
||||
break;
|
||||
default:
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// useServerPrepStmts uses COM_STMT_PREPARE and COM_STMT_EXECUTE
|
||||
// instead of COM_QUERY which allows us to test the binary protocol
|
||||
String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=true",
|
||||
host, port, database);
|
||||
|
||||
try {
|
||||
Class.forName("com.mysql.cj.jdbc.Driver");
|
||||
Connection conn = DriverManager.getConnection(jdbcUrl, user, password);
|
||||
testSimpleDataTypes(conn);
|
||||
testStringTypes(conn);
|
||||
testLowCardinalityAndNullableTypes(conn);
|
||||
testDecimalTypes(conn);
|
||||
testMiscTypes(conn);
|
||||
testDateTypes(conn);
|
||||
testUnusualDateTime64Scales(conn);
|
||||
testDateTimeTimezones(conn);
|
||||
conn.close();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testSimpleDataTypes(Connection conn) throws SQLException {
|
||||
System.out.println("### testSimpleDataTypes");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_simple_data_types").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i8"), rs.getInt("i8"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i16"), rs.getInt("i16"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i32"), rs.getInt("i32"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "i64"), rs.getLong("i64"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "i128"), rs.getString("i128"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "i256"), rs.getString("i256"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"));
|
||||
System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui64"), rs.getString("ui64"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui128"), rs.getString("ui128"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui256"), rs.getString("ui256"));
|
||||
System.out.printf("%s, value: %f\n", getMysqlType(rs, "f32"), rs.getFloat("f32"));
|
||||
System.out.printf("%s, value: %f\n", getMysqlType(rs, "f64"), rs.getFloat("f64"));
|
||||
System.out.printf("%s, value: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testStringTypes(Connection conn) throws SQLException {
|
||||
System.out.println("### testStringTypes");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_string_types").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "s"), rs.getString("s"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "sn"), rs.getString("sn"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "lc"), rs.getString("lc"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "nlc"), rs.getString("nlc"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testLowCardinalityAndNullableTypes(Connection conn) throws SQLException {
|
||||
System.out.println("### testLowCardinalityAndNullableTypes");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_low_cardinality_and_nullable_types").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"));
|
||||
// NULL int is represented as zero
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "ni"), rs.getInt("ni"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testDecimalTypes(Connection conn) throws SQLException {
|
||||
System.out.println("### testDecimalTypes");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_decimal_types").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString());
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString());
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_native"),
|
||||
rs.getBigDecimal("d128_native").toPlainString());
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d256"), rs.getString("d256"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testDateTypes(Connection conn) throws SQLException {
|
||||
System.out.println("### testDateTypes");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_date_types").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getDate("d32"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testUnusualDateTime64Scales(Connection conn) throws SQLException {
|
||||
System.out.println("### testUnusualDateTime64Scales");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_unusual_datetime64_scales").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testDateTimeTimezones(Connection conn) throws SQLException {
|
||||
System.out.println("### testDateTimeTimezones");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_datetime_timezones").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static void testMiscTypes(Connection conn) throws SQLException {
|
||||
System.out.println("### testMiscTypes");
|
||||
ResultSet rs = conn.prepareStatement("SELECT * FROM ps_misc_types").executeQuery();
|
||||
int rowNum = 1;
|
||||
while (rs.next()) {
|
||||
System.out.printf("Row #%d\n", rowNum++);
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "a"), rs.getString("a"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "u"), rs.getString("u"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "t"), rs.getString("t"));
|
||||
System.out.printf("%s, value: %s\n", getMysqlType(rs, "m"), rs.getString("m"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
private static String getMysqlType(ResultSet rs, String columnLabel) throws SQLException {
|
||||
ResultSetMetaData meta = rs.getMetaData();
|
||||
return String.format("%s type is %s", columnLabel,
|
||||
MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
|
||||
}
|
||||
|
||||
}
|
@ -46,6 +46,7 @@ class JavaConnectorTest {
|
||||
Connection conn = null;
|
||||
Statement stmt = null;
|
||||
try {
|
||||
Class.forName("com.mysql.cj.jdbc.Driver");
|
||||
conn = DriverManager.getConnection(jdbcUrl, user, password);
|
||||
stmt = conn.createStatement();
|
||||
stmt.executeUpdate(CREATE_TABLE_SQL);
|
||||
@ -69,7 +70,7 @@ class JavaConnectorTest {
|
||||
|
||||
stmt.close();
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
}
|
||||
|
@ -3,4 +3,4 @@ services:
|
||||
java1:
|
||||
image: clickhouse/mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest}
|
||||
# to keep container running
|
||||
command: sleep infinity
|
||||
command: sleep 1d
|
||||
|
@ -394,7 +394,7 @@ do
|
||||
done
|
||||
|
||||
# for each query run, prepare array of metrics from query log
|
||||
clickhouse-local --query "
|
||||
clickhouse-local --multiquery --query "
|
||||
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
|
||||
'test text, query_index int, query_id text, version UInt8, time float');
|
||||
|
||||
@ -551,7 +551,7 @@ numactl --cpunodebind=all --membind=all numactl --show
|
||||
# If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs.
|
||||
numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log
|
||||
|
||||
clickhouse-local --query "
|
||||
clickhouse-local --multiquery --query "
|
||||
-- Join the metric names back to the metric statistics we've calculated, and make
|
||||
-- a denormalized table of them -- statistics for all metrics for all queries.
|
||||
-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
|
||||
@ -649,7 +649,7 @@ rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.ts
|
||||
cat analyze/errors.log >> report/errors.log ||:
|
||||
cat profile-errors.log >> report/errors.log ||:
|
||||
|
||||
clickhouse-local --query "
|
||||
clickhouse-local --multiquery --query "
|
||||
create view query_display_names as select * from
|
||||
file('analyze/query-display-names.tsv', TSV,
|
||||
'test text, query_index int, query_display_name text')
|
||||
@ -950,7 +950,7 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
|
||||
for version in {right,left}
|
||||
do
|
||||
rm -rf data
|
||||
clickhouse-local --query "
|
||||
clickhouse-local --multiquery --query "
|
||||
create view query_profiles as
|
||||
with 0 as left, 1 as right
|
||||
select * from file('analyze/query-profiles.tsv', TSV,
|
||||
@ -1120,7 +1120,7 @@ function report_metrics
|
||||
rm -rf metrics ||:
|
||||
mkdir metrics
|
||||
|
||||
clickhouse-local --query "
|
||||
clickhouse-local --multiquery --query "
|
||||
create view right_async_metric_log as
|
||||
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
|
||||
;
|
||||
@ -1180,7 +1180,7 @@ function upload_results
|
||||
# Prepare info for the CI checks table.
|
||||
rm -f ci-checks.tsv
|
||||
|
||||
clickhouse-local --query "
|
||||
clickhouse-local --multiquery --query "
|
||||
create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes);
|
||||
|
||||
create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
|
||||
|
@ -40,6 +40,7 @@ RUN apt-get update -y \
|
||||
cargo \
|
||||
zstd \
|
||||
file \
|
||||
jq \
|
||||
pv \
|
||||
zip \
|
||||
p7zip-full \
|
||||
@ -87,5 +88,10 @@ RUN npm install -g azurite \
|
||||
COPY run.sh /
|
||||
COPY setup_minio.sh /
|
||||
COPY setup_hdfs_minicluster.sh /
|
||||
COPY attach_gdb.lib /
|
||||
COPY utils.lib /
|
||||
|
||||
# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests
|
||||
COPY stress_tests.lib /
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
source /usr/share/clickhouse-test/ci/utils.lib
|
||||
source /utils.lib
|
||||
|
||||
function attach_gdb_to_clickhouse()
|
||||
{
|
@ -22,10 +22,10 @@ dpkg -i package_folder/clickhouse-client_*.deb
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
source /attach_gdb.lib
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /usr/share/clickhouse-test/ci/utils.lib || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
source /utils.lib
|
||||
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
source /attach_gdb.lib
|
||||
source /stress_tests.lib
|
||||
|
||||
install_packages package_folder
|
||||
|
||||
|
@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
source /attach_gdb.lib
|
||||
source /stress_tests.lib
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
./setup_minio.sh stateless # to have a proper environment
|
||||
@ -60,6 +60,12 @@ install_packages previous_release_package_folder
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
# async_replication setting doesn't exist on some older versions
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "/<async_replication>1<\/async_replication>/d" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
@ -82,6 +88,12 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# async_replication setting doesn't exist on some older versions
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "/<async_replication>1<\/async_replication>/d" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
|
@ -37,6 +37,8 @@ When creating a new replica of the database, this replica creates tables by itse
|
||||
|
||||
[`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.
|
||||
|
||||
In case you need only configure a cluster without maintaining table replication, refer to [Cluster Discovery](../../operations/cluster-discovery.md) feature.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Creating a cluster with three hosts:
|
||||
|
@ -252,7 +252,7 @@ CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
@ -265,7 +265,7 @@ CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
@ -277,5 +277,8 @@ USearch currently supports two distance functions:
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
|
||||
was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
@ -1139,6 +1139,8 @@ Optional parameters:
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
@ -1220,7 +1222,6 @@ Configuration markup:
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
@ -1248,9 +1249,9 @@ Limit parameters (mainly for internal usage):
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
|
@ -58,7 +58,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334,"peak_memory_usage":"0"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -288,9 +288,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
|
||||
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
|
||||
|
||||
``` text
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
|
||||
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
|
||||
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
|
||||
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
|
||||
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
|
||||
```
|
||||
|
||||
Possible header fields:
|
||||
@ -439,7 +439,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
# TYPE "Query" counter
|
||||
@ -604,7 +604,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
Say Hi!%
|
||||
@ -644,7 +644,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -696,7 +696,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -715,7 +715,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
|
171
docs/en/operations/cluster-discovery.md
Normal file
171
docs/en/operations/cluster-discovery.md
Normal file
@ -0,0 +1,171 @@
|
||||
---
|
||||
slug: /en/operations/cluster-discovery
|
||||
sidebar_label: Cluster Discovery
|
||||
---
|
||||
# Cluster Discovery
|
||||
|
||||
## Overview
|
||||
|
||||
ClickHouse's Cluster Discovery feature simplifies cluster configuration by allowing nodes to automatically discover and register themselves without the need for explicit definition in the configuration files. This is especially beneficial in cases where the manual definition of each node becomes cumbersome.
|
||||
|
||||
:::note
|
||||
|
||||
Cluster Discovery is an experimental feature and can be changed or removed in future versions.
|
||||
To enable it include the `allow_experimental_cluster_discovery` setting in your configuration file:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<!-- ... -->
|
||||
<allow_experimental_cluster_discovery>1</allow_experimental_cluster_discovery>
|
||||
<!-- ... -->
|
||||
</clickhouse>
|
||||
```
|
||||
:::
|
||||
|
||||
## Remote Servers Configuration
|
||||
|
||||
### Traditional Manual Configuration
|
||||
|
||||
Traditionally, in ClickHouse, each shard and replica in the cluster needed to be manually specified in the configuration:
|
||||
|
||||
```xml
|
||||
<remote_servers>
|
||||
<cluster_name>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>node1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>node2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>node3</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>node4</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster_name>
|
||||
</remote_servers>
|
||||
|
||||
```
|
||||
|
||||
### Using Cluster Discovery
|
||||
|
||||
With Cluster Discovery, rather than defining each node explicitly, you simply specify a path in ZooKeeper. All nodes that register under this path in ZooKeeper will be automatically discovered and added to the cluster.
|
||||
|
||||
```xml
|
||||
<remote_servers>
|
||||
<cluster_name>
|
||||
<discovery>
|
||||
<path>/clickhouse/discovery/cluster_name</path>
|
||||
</discovery>
|
||||
</cluster_name>
|
||||
</remote_servers>
|
||||
```
|
||||
|
||||
If you want to specify a shard number for a particular node, you can include the `<shard>` tag within the `<discovery>` section:
|
||||
|
||||
for `node1` and `node2`:
|
||||
|
||||
```xml
|
||||
<discovery>
|
||||
<path>/clickhouse/discovery/cluster_name</path>
|
||||
<shard>1</shard>
|
||||
</discovery>
|
||||
```
|
||||
|
||||
for `node3` and `node4`:
|
||||
|
||||
```xml
|
||||
<discovery>
|
||||
<path>/clickhouse/discovery/cluster_name</path>
|
||||
<shard>2</shard>
|
||||
</discovery>
|
||||
```
|
||||
|
||||
### Observer mode
|
||||
|
||||
|
||||
Nodes configured in observer mode will not register themselves as replicas.
|
||||
They will solely observe and discover other active replicas in the cluster without actively participating.
|
||||
To enable observer mode, include the `<observer/>` tag within the `<discovery>` section:
|
||||
|
||||
```xml
|
||||
<discovery>
|
||||
<path>/clickhouse/discovery/cluster_name</path>
|
||||
<observer/>
|
||||
</discovery>
|
||||
```
|
||||
|
||||
|
||||
## Use-Cases and Limitations
|
||||
|
||||
As nodes are added or removed from the specified ZooKeeper path, they are automatically discovered or removed from the cluster without the need for configuration changes or server restarts.
|
||||
|
||||
However, changes affect only cluster configuration, not the data or existing databases and tables.
|
||||
|
||||
Consider the following example with a cluster of 3 nodes:
|
||||
|
||||
|
||||
```xml
|
||||
<remote_servers>
|
||||
<default>
|
||||
<discovery>
|
||||
<path>/clickhouse/discovery/default_cluster</path>
|
||||
</discovery>
|
||||
</default>
|
||||
</remote_servers>
|
||||
```
|
||||
|
||||
```
|
||||
SELECT * EXCEPT (default_database, errors_count, slowdowns_count, estimated_recovery_time, database_shard_name, database_replica_name)
|
||||
FROM system.clusters WHERE cluster = 'default';
|
||||
|
||||
┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐
|
||||
│ default │ 1 │ 1 │ 1 │ 92d3c04025e8 │ 172.26.0.5 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
|
||||
│ default │ 1 │ 1 │ 2 │ a6a68731c21b │ 172.26.0.4 │ 9000 │ 1 │ │ ᴺᵁᴸᴸ │
|
||||
│ default │ 1 │ 1 │ 3 │ 8e62b9cb17a1 │ 172.26.0.2 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
|
||||
└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE event_table ON CLUSTER default (event_time DateTime, value String)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/event_table', '{replica}')
|
||||
ORDER BY event_time PARTITION BY toYYYYMM(event_time);
|
||||
|
||||
INSERT INTO event_table ...
|
||||
```
|
||||
|
||||
Then, we add a new node to the cluster, starting a new node with the same entry in the `remote_servers` section in a configuration file:
|
||||
|
||||
```
|
||||
┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐
|
||||
│ default │ 1 │ 1 │ 1 │ 92d3c04025e8 │ 172.26.0.5 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
|
||||
│ default │ 1 │ 1 │ 2 │ a6a68731c21b │ 172.26.0.4 │ 9000 │ 1 │ │ ᴺᵁᴸᴸ │
|
||||
│ default │ 1 │ 1 │ 3 │ 8e62b9cb17a1 │ 172.26.0.2 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
|
||||
│ default │ 1 │ 1 │ 4 │ b0df3669b81f │ 172.26.0.6 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │
|
||||
└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘
|
||||
```
|
||||
|
||||
The fourth node is participating in the cluster, but table `event_table` still exists only on the first three nodes:
|
||||
|
||||
|
||||
```sql
|
||||
SELECT hostname(), database, table FROM clusterAllReplicas(default, system.tables) WHERE table = 'event_table' FORMAT PrettyCompactMonoBlock
|
||||
|
||||
┌─hostname()───┬─database─┬─table───────┐
|
||||
│ a6a68731c21b │ default │ event_table │
|
||||
│ 92d3c04025e8 │ default │ event_table │
|
||||
│ 8e62b9cb17a1 │ default │ event_table │
|
||||
└──────────────┴──────────┴─────────────┘
|
||||
```
|
||||
|
||||
If you need to have tables replicated on all the nodes, you may use the [Replicated](../engines/database-engines/replicated.md) database engine in alternative to cluster discovery.
|
||||
|
@ -88,7 +88,7 @@ Default: 2
|
||||
|
||||
## background_merges_mutations_scheduling_policy
|
||||
|
||||
The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`.
|
||||
The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`.
|
||||
|
||||
## background_merges_mutations_scheduling_policy
|
||||
|
||||
@ -583,7 +583,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc
|
||||
|
||||
Type: String
|
||||
|
||||
Default:
|
||||
Default:
|
||||
|
||||
## thread_pool_queue_size
|
||||
|
||||
@ -640,7 +640,7 @@ When `/disk1` is full, temporary data will be stored on `/disk2`.
|
||||
```
|
||||
Type: String
|
||||
|
||||
Default:
|
||||
Default:
|
||||
|
||||
## uncompressed_cache_policy
|
||||
|
||||
@ -1948,7 +1948,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<max_size_rows>1048576</max_size_rows>
|
||||
<reserved_size_rows>8192</reserved_size_rows>
|
||||
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
|
||||
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
|
||||
<flush_on_crash>false</flush_on_crash>
|
||||
</query_thread_log>
|
||||
```
|
||||
@ -2236,6 +2236,8 @@ For the value of the `incl` attribute, see the section “[Configuration files](
|
||||
**See Also**
|
||||
|
||||
- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
|
||||
- [Cluster Discovery](../../operations/cluster-discovery.md)
|
||||
- [Replicated database engine](../../engines/database-engines/replicated.md)
|
||||
|
||||
## timezone {#server_configuration_parameters-timezone}
|
||||
|
||||
@ -2404,7 +2406,7 @@ This section contains the following parameters:
|
||||
* nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the server’s hostname.
|
||||
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
|
||||
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
|
||||
|
||||
|
||||
**Example configuration**
|
||||
|
||||
``` xml
|
||||
|
@ -71,7 +71,7 @@ Possible values:
|
||||
|
||||
- Any positive integer.
|
||||
|
||||
Default value: 150.
|
||||
Default value: 1000.
|
||||
|
||||
ClickHouse artificially executes `INSERT` longer (adds ‘sleep’) so that the background merge process can merge parts faster than they are added.
|
||||
|
||||
|
@ -4644,6 +4644,14 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
## partial_result_update_duration_ms
|
||||
|
||||
Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET.
|
||||
|
||||
## max_rows_in_partial_result
|
||||
|
||||
Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query).
|
||||
|
||||
## validate_tcp_client_information {#validate-tcp-client-information}
|
||||
|
||||
Determines whether validation of client information enabled when query packet is received from a client using a TCP connection.
|
||||
@ -4659,3 +4667,44 @@ The default value is `false`.
|
||||
``` xml
|
||||
<validate_tcp_client_information>true</validate_tcp_client_information>
|
||||
```
|
||||
|
||||
## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
|
||||
|
||||
Allows to ignore 'permission denied' errors when using multi-directory `{}` globs for [File](../../sql-reference/table-functions/file.md#globs_in_path) and [HDFS](../../sql-reference/table-functions/hdfs.md) storages.
|
||||
This setting is only applicable to multi directory `{}` globs.
|
||||
|
||||
Possible values: `0`, `1`.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### Example
|
||||
|
||||
Having the following structure in `user_files`:
|
||||
```
|
||||
my_directory/
|
||||
├── data1
|
||||
│ ├── f1.csv
|
||||
├── data2
|
||||
│ ├── f2.csv
|
||||
└── test_root
|
||||
```
|
||||
where `data1`, `data2` directories are accessible, but one has no rights to read `test_root` directories.
|
||||
|
||||
For a query like `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` an exception will be thrown:
|
||||
`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.
|
||||
It happens because a multi-directory glob requires a recursive search in _all_ available directories under `my_directory`.
|
||||
|
||||
If this setting is on, all inaccessible directories will be silently skipped, even if they are explicitly specified inside `{}`.
|
||||
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
|
||||
|
||||
Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
|
||||
```
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
|
||||
|
||||
┌─_path───────────────────┬─_file───────┐
|
||||
│ <full path to file> │ <file name> │
|
||||
└─────────────────────────┴─────────────┘
|
||||
```
|
||||
|
@ -3,12 +3,13 @@ slug: /en/operations/system-tables/information_schema
|
||||
---
|
||||
# INFORMATION_SCHEMA
|
||||
|
||||
`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables.
|
||||
|
||||
The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used.
|
||||
`INFORMATION_SCHEMA` (or: `information_schema`) is a system database which provides a (somewhat) standardized, [DBMS-agnostic view](https://en.wikipedia.org/wiki/Information_schema) on metadata of database objects. The views in `INFORMATION_SCHEMA` are generally inferior to normal system tables but tools can use them to obtain basic information in a cross-DBMS manner. The structure and content of views in `INFORMATION_SCHEMA` is supposed to evolves in a backwards-compatible way, i.e. only new functionality is added but existing functionality is not changed or removed. In terms of internal implementation, views in `INFORMATION_SCHEMA` usually map to to normal system tables like [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md).
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM INFORMATION_SCHEMA;
|
||||
|
||||
-- or:
|
||||
SHOW TABLES FROM information_schema;
|
||||
```
|
||||
|
||||
``` text
|
||||
@ -17,6 +18,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
|
||||
│ SCHEMATA │
|
||||
│ TABLES │
|
||||
│ VIEWS │
|
||||
│ columns │
|
||||
│ schemata │
|
||||
│ tables │
|
||||
│ views │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
@ -27,6 +32,8 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
|
||||
- [TABLES](#tables)
|
||||
- [VIEWS](#views)
|
||||
|
||||
Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases.
|
||||
|
||||
## COLUMNS {#columns}
|
||||
|
||||
Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard.
|
||||
|
@ -101,7 +101,8 @@ Columns:
|
||||
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
|
||||
- `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
|
||||
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
|
||||
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
|
||||
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously.
|
||||
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query.
|
||||
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
|
||||
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
|
||||
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.
|
||||
|
64
docs/en/operations/system-tables/scheduler.md
Normal file
64
docs/en/operations/system-tables/scheduler.md
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/scheduler
|
||||
---
|
||||
# scheduler
|
||||
|
||||
Contains information and status for [scheduling nodes](/docs/en/operations/workload-scheduling.md/#hierarchy) residing on the local server.
|
||||
This table can be used for monitoring. The table contains a row for every scheduling node.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.scheduler
|
||||
WHERE resource = 'network_read' AND path = '/prio/fair/prod'
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
resource: network_read
|
||||
path: /prio/fair/prod
|
||||
type: fifo
|
||||
weight: 5
|
||||
priority: 0
|
||||
is_active: 0
|
||||
active_children: 0
|
||||
dequeued_requests: 67
|
||||
dequeued_cost: 4692272
|
||||
busy_periods: 63
|
||||
vruntime: 938454.1999999989
|
||||
system_vruntime: ᴺᵁᴸᴸ
|
||||
queue_length: 0
|
||||
queue_cost: 0
|
||||
budget: -60524
|
||||
is_satisfied: ᴺᵁᴸᴸ
|
||||
inflight_requests: ᴺᵁᴸᴸ
|
||||
inflight_cost: ᴺᵁᴸᴸ
|
||||
max_requests: ᴺᵁᴸᴸ
|
||||
max_cost: ᴺᵁᴸᴸ
|
||||
```
|
||||
|
||||
Columns:
|
||||
|
||||
- `resource` (`String`) - Resource name
|
||||
- `path` (`String`) - Path to a scheduling node within this resource scheduling hierarchy
|
||||
- `type` (`String`) - Type of a scheduling node.
|
||||
- `weight` (`Float64`) - Weight of a node, used by a parent node of `fair`` type.
|
||||
- `priority` (`Int64`) - Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority).
|
||||
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
|
||||
- `active_children` (`UInt64`) - The number of children in active state.
|
||||
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
|
||||
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
|
||||
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
|
||||
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
|
||||
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
|
||||
- `queue_length` (`Nullable(UInt64)`) - For `fifo` nodes only. Current number of resource requests residing in the queue.
|
||||
- `queue_cost` (`Nullable(UInt64)`) - For `fifo` nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue.
|
||||
- `budget` (`Nullable(Int64)`) - For `fifo` nodes only. The number of available "cost units" for new resource requests. Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure)
|
||||
- `is_satisfied` (`Nullable(UInt8)`) - For constraint nodes only (e.g. `inflight_limit`). Equals `1` if all the constraint of this node are satisfied.
|
||||
- `inflight_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state.
|
||||
- `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state.
|
||||
- `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation.
|
||||
- `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation.
|
@ -16,6 +16,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
|
||||
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
|
||||
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
|
||||
- `--log-level=LEVEL` — Set log level. Default value: `information`.
|
||||
- `--no-confirmation` — If set, will not require a confirmation on several commands. Default value `false` for interactive and `true` for query
|
||||
- `--help` — Shows the help message.
|
||||
|
||||
## Example {#clickhouse-keeper-client-example}
|
||||
@ -43,12 +45,13 @@ keeper foo bar
|
||||
## Commands {#clickhouse-keeper-client-commands}
|
||||
|
||||
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
|
||||
- `cd [path]` -- Change the working path (default `.`)
|
||||
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
|
||||
- `cd [path]` -- Changes the working path (default `.`)
|
||||
- `exists <path>` -- Returns `1` if node exists, `0` otherwise
|
||||
- `set <path> <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
|
||||
- `create <path> <value> [mode]` -- Creates new node with the set value
|
||||
- `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
|
||||
- `get <path>` -- Returns the node's value
|
||||
- `remove <path>` -- Remove the node
|
||||
- `rm <path> [version]` -- Removes the node only if version matches (default: -1)
|
||||
- `rmr <path>` -- Recursively deletes path. Confirmation required
|
||||
- `flwc <command>` -- Executes four-letter-word command
|
||||
- `help` -- Prints this message
|
||||
@ -56,3 +59,5 @@ keeper foo bar
|
||||
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
|
||||
- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
|
||||
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
|
||||
- `sync <path>` -- Synchronizes node between processes and leader
|
||||
- `reconfig <add|remove|set> "<arg>" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration
|
||||
|
153
docs/en/operations/workload-scheduling.md
Normal file
153
docs/en/operations/workload-scheduling.md
Normal file
@ -0,0 +1,153 @@
|
||||
---
|
||||
slug: /en/operations/workload-scheduling
|
||||
sidebar_position: 69
|
||||
sidebar_label: "Workload scheduling"
|
||||
title: "Workload scheduling"
|
||||
---
|
||||
|
||||
When ClickHouse execute multiple queries simultaneously, they may be using shared resources (e.g. disks). Scheduling constraints and policies can be applied to regulate how resources are utilized and shared between different workloads. For every resource a scheduling hierarchy can be configured. Hierarchy root represents a resource, while leafs are queues, holding requests that exceed resource capacity.
|
||||
|
||||
:::note
|
||||
Currently only remote disk IO can be scheduled using described method. For CPU scheduling see settings about thread pools and [`concurrent_threads_soft_limit_num`](server-configuration-parameters/settings.md#concurrent_threads_soft_limit_num). For flexible memory limits see [Memory overcommit](settings/memory-overcommit.md)
|
||||
:::
|
||||
|
||||
## Disk configuration {#disk-config}
|
||||
|
||||
To enable IO scheduling for a specific disk, you have to specify `read_resource` and/or `write_resource` in storage configuration. It says ClickHouse what resource should be used for every read and write requests with given disk. Read and write resource can refer to the same resource name, which is useful for local SSDs or HDDs. Multiple different disks also can refer to the same resource, which is useful for remote disks: if you want to be able to allow fair division of network bandwidth between e.g. "production" and "development" workloads.
|
||||
|
||||
Example:
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
<read_resource>network_read</read_resource>
|
||||
<write_resource>network_write</write_resource>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3_main>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_main>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Workload markup {#workload_markup}
|
||||
|
||||
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
|
||||
|
||||
Let's consider an example of a system with two different workloads: "production" and "development".
|
||||
|
||||
```sql
|
||||
SELECT count() FROM my_table WHERE value = 42 SETTINGS workload = 'production'
|
||||
SELECT count() FROM my_table WHERE value = 13 SETTINGS workload = 'development'
|
||||
```
|
||||
|
||||
## Resource scheduling hierarchy {#hierarchy}
|
||||
|
||||
From the standpoint of scheduling subsystem a resource represents a hierarchy of scheduling nodes.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph network_read
|
||||
nr_root(("/"))
|
||||
-->|100 concurrent requests| nr_fair("fair")
|
||||
-->|75% bandwidth| nr_prod["prod"]
|
||||
nr_fair
|
||||
-->|25% bandwidth| nr_dev["dev"]
|
||||
end
|
||||
|
||||
subgraph network_write
|
||||
nw_root(("/"))
|
||||
-->|100 concurrent requests| nw_fair("fair")
|
||||
-->|75% bandwidth| nw_prod["prod"]
|
||||
nw_fair
|
||||
-->|25% bandwidth| nw_dev["dev"]
|
||||
end
|
||||
```
|
||||
|
||||
**Possible node types:**
|
||||
* `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child.
|
||||
* `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1).
|
||||
* `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
|
||||
* `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
|
||||
|
||||
The following example shows how to define IO scheduling hierarchies shown in the picture:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<resources>
|
||||
<network_read>
|
||||
<node path="/">
|
||||
<type>inflight_limit</type>
|
||||
<max_requests>100</max_requests>
|
||||
</node>
|
||||
<node path="/fair">
|
||||
<type>fair</type>
|
||||
</node>
|
||||
<node path="/fair/prod">
|
||||
<type>fifo</type>
|
||||
<weight>3</weight>
|
||||
</node>
|
||||
<node path="/fair/dev">
|
||||
<type>fifo</type>
|
||||
</node>
|
||||
</network_read>
|
||||
<network_write>
|
||||
<node path="/">
|
||||
<type>inflight_limit</type>
|
||||
<max_requests>100</max_requests>
|
||||
</node>
|
||||
<node path="/fair">
|
||||
<type>fair</type>
|
||||
</node>
|
||||
<node path="/fair/prod">
|
||||
<type>fifo</type>
|
||||
<weight>3</weight>
|
||||
</node>
|
||||
<node path="/fair/dev">
|
||||
<type>fifo</type>
|
||||
</node>
|
||||
</network_write>
|
||||
</resources>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Workload classifiers {#workload_classifiers}
|
||||
|
||||
Workload classifiers are used to define mapping from `workload` specified by a query into leaf-queues that should be used for specific resources. At the moment, workload classification is simple: only static mapping is available.
|
||||
|
||||
Example:
|
||||
```xml
|
||||
<clickhouse>
|
||||
<workload_classifiers>
|
||||
<production>
|
||||
<network_read>/fair/prod</network_read>
|
||||
<network_write>/fair/prod</network_write>
|
||||
</production>
|
||||
<development>
|
||||
<network_read>/fair/dev</network_read>
|
||||
<network_write>/fair/dev</network_write>
|
||||
</development>
|
||||
<default>
|
||||
<network_read>/fair/dev</network_read>
|
||||
<network_write>/fair/dev</network_write>
|
||||
</default>
|
||||
</workload_classifiers>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
|
||||
## See also
|
||||
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md)
|
@ -12,7 +12,7 @@ Values can be added to the array in any (indeterminate) order.
|
||||
|
||||
The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupArray(1)(x)` is equivalent to `[any (x)]`.
|
||||
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -10,7 +10,7 @@ Syntax: `groupArrayLast(max_size)(x)`
|
||||
Creates an array of last argument values.
|
||||
For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`.
|
||||
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -725,6 +725,42 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
└────────────┴───────────┴───────────┴───────────┴───────────────┘
|
||||
```
|
||||
|
||||
## toDaysSinceYearZero
|
||||
|
||||
Returns for a given date, the number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`TO_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_to-days) function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toDaysSinceYearZero(date)
|
||||
```
|
||||
|
||||
Aliases: `TO_DAYS`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md) or [Date32](../../sql-reference/data-types/date32.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
The number of days passed since date 0000-01-01.
|
||||
|
||||
Type: [UInt32](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT toDaysSinceYearZero(toDate('2023-09-08'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toDaysSinceYearZero(toDate('2023-09-08')))─┐
|
||||
│ 713569 │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## age
|
||||
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
|
||||
@ -947,6 +983,8 @@ Result:
|
||||
|
||||
Adds the time interval or date interval to the provided date or date with time.
|
||||
|
||||
If the addition results in a value outside the bounds of the data type, the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -970,13 +1008,13 @@ Aliases: `dateAdd`, `DATE_ADD`.
|
||||
- `year`
|
||||
|
||||
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
|
||||
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
Date or date with time obtained by adding `value`, expressed in `unit`, to `date`.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -992,10 +1030,16 @@ Result:
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [addDate](#addDate)
|
||||
|
||||
## date\_sub
|
||||
|
||||
Subtracts the time interval or date interval from the provided date or date with time.
|
||||
|
||||
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -1020,13 +1064,13 @@ Aliases: `dateSub`, `DATE_SUB`.
|
||||
- `year`
|
||||
|
||||
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
|
||||
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -1042,10 +1086,15 @@ Result:
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
- [subDate](#subDate)
|
||||
|
||||
## timestamp\_add
|
||||
|
||||
Adds the specified time value with the provided date or date time value.
|
||||
|
||||
If the addition results in a value outside the bounds of the data type, the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -1056,7 +1105,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
|
||||
- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
@ -1074,7 +1123,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
|
||||
|
||||
Date or date with time with the specified `value` expressed in `unit` added to `date`.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -1094,6 +1143,8 @@ Result:
|
||||
|
||||
Subtracts the time interval from the provided date or date with time.
|
||||
|
||||
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -1117,13 +1168,13 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
|
||||
- `year`
|
||||
|
||||
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
|
||||
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -1139,6 +1190,90 @@ Result:
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## addDate
|
||||
|
||||
Adds the time interval or date interval to the provided date or date with time.
|
||||
|
||||
If the addition results in a value outside the bounds of the data type, the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
addDate(date, interval)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
Date or date with time obtained by adding `interval` to `date`.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT addDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─addDate(toDate('2018-01-01'), toIntervalYear(3))─┐
|
||||
│ 2021-01-01 │
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Alias: `ADDDATE`
|
||||
|
||||
**See Also**
|
||||
- [date_add](#date_add)
|
||||
|
||||
## subDate
|
||||
|
||||
Subtracts the time interval or date interval from the provided date or date with time.
|
||||
|
||||
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
subDate(date, interval)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
Date or date with time obtained by subtracting `interval` from `date`.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT subDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─subDate(toDate('2018-01-01'), toIntervalYear(3))─┐
|
||||
│ 2015-01-01 │
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Alias: `SUBDATE`
|
||||
|
||||
**See Also**
|
||||
- [date_sub](#date_sub)
|
||||
|
||||
## now
|
||||
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
@ -1290,6 +1425,8 @@ Rounds the time to the half hour.
|
||||
|
||||
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
|
||||
|
||||
This functions is the opposite of function `YYYYMMDDToDate()`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
@ -1312,8 +1449,7 @@ Converts a date or date with time to a UInt32 number containing the year and mon
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDD(now(), 'US/Eastern')
|
||||
SELECT toYYYYMMDD(now(), 'US/Eastern')
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -1331,8 +1467,7 @@ Converts a date or date with time to a UInt64 number containing the year and mon
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYYYYMMDDhhmmss(now(), 'US/Eastern')
|
||||
SELECT toYYYYMMDDhhmmss(now(), 'US/Eastern')
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -1343,6 +1478,93 @@ Result:
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## YYYYMMDDToDate
|
||||
|
||||
Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md).
|
||||
|
||||
This functions is the opposite of function `toYYYYMMDD()`.
|
||||
|
||||
The output is undefined if the input does not encode a valid Date value.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
YYYYMMDDToDate(yyyymmdd);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- a date created from the arguments.
|
||||
|
||||
Type: [Date](../../sql-reference/data-types/date.md).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT YYYYMMDDToDate(20230911);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toYYYYMMDD(20230911)─┐
|
||||
│ 2023-09-11 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## YYYYMMDDToDate32
|
||||
|
||||
Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md).
|
||||
|
||||
## YYYYMMDDhhmmssToDateTime
|
||||
|
||||
Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
The output is undefined if the input does not encode a valid DateTime value.
|
||||
|
||||
This functions is the opposite of function `toYYYYMMDDhhmmss()`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- a date with time created from the arguments.
|
||||
|
||||
Type: [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT YYYYMMDDToDateTime(20230911131415);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌──────YYYYMMDDhhmmssToDateTime(20230911131415)─┐
|
||||
│ 2023-09-11 13:14:15 │
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## YYYYMMDDhhmmssToDateTime64
|
||||
|
||||
Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
Accepts an additional, optional `precision` parameter after the `timezone` parameter.
|
||||
|
||||
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
|
||||
|
||||
Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example:
|
||||
@ -1635,7 +1857,7 @@ monthName(date)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -90,7 +90,7 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re
|
||||
CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
|
||||
```
|
||||
|
||||
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
|
||||
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. The following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
|
||||
|
||||
```bash
|
||||
/var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql
|
||||
|
@ -208,7 +208,7 @@ The optional keyword `FULL` causes the output to include the collation, comment
|
||||
The statement produces a result table with the following structure:
|
||||
- field - The name of the column (String)
|
||||
- type - The column data type (String)
|
||||
- null - If the column data type is Nullable (UInt8)
|
||||
- null - `YES` if the column data type is Nullable, `NO` otherwise (String)
|
||||
- key - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String)
|
||||
- default - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String))
|
||||
- extra - Additional information, currently unused (String)
|
||||
@ -638,3 +638,16 @@ Outputs the content of the [system.table_engines](../../operations/system-tables
|
||||
**See Also**
|
||||
|
||||
- [system.table_engines](../../operations/system-tables/table_engines.md) table
|
||||
|
||||
## SHOW FUNCTIONS
|
||||
|
||||
``` sql
|
||||
SHOW FUNCTIONS [LIKE | ILIKE '<pattern>']
|
||||
```
|
||||
|
||||
Outputs the content of the [system.functions](../../operations/system-tables/functions.md) table.
|
||||
|
||||
If either `LIKE` or `ILIKE` clause is specified, the query returns a list of system functions whose names match the provided `<pattern>`.
|
||||
|
||||
**See Also**
|
||||
- [system.functions](../../operations/system-tables/functions.md) table
|
||||
|
@ -135,13 +135,13 @@ Getting data from table in table.csv, located in archive1.zip or/and archive2.zi
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in Path
|
||||
## Globs in Path {#globs_in_path}
|
||||
|
||||
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting for file & HDFS.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
|
||||
@ -210,7 +210,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
||||
- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
|
||||
|
||||
|
||||
**See Also**
|
||||
|
@ -39,13 +39,13 @@ LIMIT 2
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
**Globs in path**
|
||||
## Globs in path {#globs_in_path}
|
||||
|
||||
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
|
||||
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
|
||||
@ -102,6 +102,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
|
||||
|
||||
**See Also**
|
||||
|
||||
|
@ -805,8 +805,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
</disks>
|
||||
@ -832,8 +830,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
- `single_read_retries` — число попыток выполнения запроса в случае возникновения ошибки в процессе чтения. Значение по умолчанию: `4`.
|
||||
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.
|
||||
- `metadata_path` — путь к локальному файловому хранилищу для хранения файлов с метаданными для S3. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `cache_enabled` — признак, разрешено ли хранение кэша засечек и файлов индекса в локальной файловой системе. Значение по умолчанию: `true`.
|
||||
- `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`.
|
||||
|
||||
Диск S3 может быть сконфигурирован как `main` или `cold`:
|
||||
|
@ -50,7 +50,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -267,9 +267,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
|
||||
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
|
||||
|
||||
``` text
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
|
||||
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
|
||||
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
|
||||
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
|
||||
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
|
||||
```
|
||||
|
||||
Возможные поля заголовка:
|
||||
@ -530,7 +530,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
Say Hi!%
|
||||
@ -570,7 +570,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -622,7 +622,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -641,7 +641,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
|
@ -4209,3 +4209,45 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
|
||||
│ 1.7091 │ 15008753 │
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
|
||||
|
||||
Позволяет игнорировать ошибку 'permission denied', возникающую при использовании шаблона `{}`, содержащего `/` внутри себя.
|
||||
Работает для [File](../../sql-reference/table-functions/file.md#globs_in_path) и [HDFS](../../sql-reference/table-functions/hdfs.md).
|
||||
Работает _только_ для указанных выше шаблонов `{}`.
|
||||
|
||||
Возможные значения: `0`, `1`.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
### Пример
|
||||
|
||||
Пусть в `user_files` имеется следующая структура:
|
||||
```
|
||||
my_directory/
|
||||
├── data1
|
||||
│ ├── f1.csv
|
||||
├── data2
|
||||
│ ├── f2.csv
|
||||
└── test_root
|
||||
```
|
||||
Пусть также директории `data1`, `data2` могут быть прочитаны, но прав на чтение `test_root` нет.
|
||||
|
||||
На запрос `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` будет выброшено исключение:
|
||||
`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.
|
||||
Это происходит, так как для обработки такого шаблона необходимо выполнить рекурсивный поиск по _всем_ директориям, находящимся внутри `my_directory`.
|
||||
|
||||
Если данная настройка имеет значение 1, то недоступные директории будут тихо пропущены, даже если они явно указаны внутри `{}`.
|
||||
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
|
||||
|
||||
Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
|
||||
```
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
|
||||
|
||||
┌─_path───────────────────┬─_file───────┐
|
||||
│ <full path to file> │ <file name> │
|
||||
└─────────────────────────┴─────────────┘
|
||||
```
|
||||
|
@ -99,7 +99,8 @@ ClickHouse не удаляет данные из таблица автомати
|
||||
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
|
||||
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
|
||||
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
|
||||
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
|
||||
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов, эти потоки не обязательно выполняются одновременно.
|
||||
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — максимальное количество одновременно работавших потоков, участвоваших в обработке запроса.
|
||||
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
|
||||
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
|
||||
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.
|
||||
|
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
|
||||
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
|
||||
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
@ -124,6 +124,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
|
||||
- `_path` — путь к файлу.
|
||||
- `_file` — имя файла.
|
||||
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
|
||||
|
@ -39,11 +39,11 @@ LIMIT 2
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
**Шаблоны в пути**
|
||||
## Шаблоны поиска в компонентах пути {#globs-in-path}
|
||||
|
||||
- `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — Заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
|
||||
- `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
@ -62,3 +62,5 @@ LIMIT 2
|
||||
**Смотрите также**
|
||||
|
||||
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
|
||||
- Параметр [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs)
|
||||
|
||||
|
@ -745,8 +745,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
</disks>
|
||||
@ -772,8 +770,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。
|
||||
- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。
|
||||
- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks/<disk_name>/`
|
||||
- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。
|
||||
- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks/<disk_name>/cache/`。
|
||||
- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。
|
||||
- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。
|
||||
|
||||
@ -823,4 +819,3 @@ S3磁盘也可以设置冷热存储:
|
||||
- `_part_uuid` - 唯一部分标识符(如果 MergeTree 设置`assign_part_uuids` 已启用)。
|
||||
- `_partition_value` — `partition by` 表达式的值(元组)。
|
||||
- `_sample_factor` - 采样因子(来自请求)。
|
||||
|
||||
|
@ -53,7 +53,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
|
||||
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
|
||||
|
||||
``` text
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
|
||||
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
|
||||
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
|
||||
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
|
||||
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
|
||||
```
|
||||
|
||||
显示字段信息:
|
||||
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
# TYPE "Query" counter
|
||||
@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
Say Hi!%
|
||||
@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=3
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
|
@ -5,8 +5,8 @@ sidebar_position: 31
|
||||
|
||||
# stddevSamp {#stddevsamp}
|
||||
|
||||
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
|
||||
结果等于 [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) 的平方根。
|
||||
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
:::
|
||||
|
@ -32,10 +32,10 @@ contents:
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
- src: clickhouse-keeper.service
|
||||
dst: /lib/systemd/system/clickhouse-keeper.service
|
||||
- src: clickhouse
|
||||
- src: clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper-client
|
||||
type: symlink
|
||||
- src: clickhouse
|
||||
- src: clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper-converter
|
||||
type: symlink
|
||||
# docs
|
||||
|
@ -1209,8 +1209,6 @@
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
</disks>
|
||||
|
@ -18,7 +18,14 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/parseGlobs.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
static void setupLogging(const std::string & log_level)
|
||||
{
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include <thread>
|
||||
#include <filesystem>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <Common/TerminalSize.h>
|
||||
@ -26,6 +24,14 @@
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
static constexpr auto documentation = R"(
|
||||
A tool to extract information from Git repository for analytics.
|
||||
|
@ -9,11 +9,11 @@ namespace DB
|
||||
|
||||
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return true;
|
||||
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -42,11 +42,11 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
|
||||
|
||||
bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return true;
|
||||
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -64,11 +64,12 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
|
||||
|
||||
bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
String arg;
|
||||
if (!parseKeeperArg(pos, expected, arg))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
@ -93,11 +94,12 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
|
||||
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
String arg;
|
||||
if (!parseKeeperArg(pos, expected, arg))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
@ -143,10 +145,10 @@ void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client)
|
||||
|
||||
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -156,13 +158,28 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
|
||||
}
|
||||
|
||||
bool ExistsCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
|
||||
{
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ExistsCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
|
||||
{
|
||||
std::cout << client->zookeeper->exists(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
|
||||
}
|
||||
|
||||
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return true;
|
||||
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -325,25 +342,33 @@ void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client)
|
||||
|
||||
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
ASTPtr version;
|
||||
if (ParserNumber{}.parse(pos, version, expected))
|
||||
node->args.push_back(version->as<ASTLiteral &>().value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
|
||||
Int32 version{-1};
|
||||
if (query->args.size() == 2)
|
||||
version = static_cast<Int32>(query->args[1].get<Int32>());
|
||||
|
||||
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
|
||||
}
|
||||
|
||||
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -351,8 +376,72 @@ bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & nod
|
||||
void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
String path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
client->askConfirmation("You are going to recursively delete path " + path,
|
||||
[client, path]{ client->zookeeper->removeRecursive(path); });
|
||||
client->askConfirmation(
|
||||
"You are going to recursively delete path " + path, [client, path] { client->zookeeper->removeRecursive(path); });
|
||||
}
|
||||
|
||||
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
|
||||
{
|
||||
ReconfigCommand::Operation operation;
|
||||
if (ParserKeyword{"ADD"}.ignore(pos, expected))
|
||||
operation = ReconfigCommand::Operation::ADD;
|
||||
else if (ParserKeyword{"REMOVE"}.ignore(pos, expected))
|
||||
operation = ReconfigCommand::Operation::REMOVE;
|
||||
else if (ParserKeyword{"SET"}.ignore(pos, expected))
|
||||
operation = ReconfigCommand::Operation::SET;
|
||||
else
|
||||
return false;
|
||||
|
||||
node->args.push_back(operation);
|
||||
ParserToken{TokenType::Whitespace}.ignore(pos);
|
||||
|
||||
String arg;
|
||||
if (!parseKeeperArg(pos, expected, arg))
|
||||
return false;
|
||||
node->args.push_back(std::move(arg));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
|
||||
{
|
||||
String joining;
|
||||
String leaving;
|
||||
String new_members;
|
||||
|
||||
auto operation = query->args[0].get<ReconfigCommand::Operation>();
|
||||
switch (operation)
|
||||
{
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
|
||||
joining = query->args[1].safeGet<DB::String>();
|
||||
break;
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
|
||||
leaving = query->args[1].safeGet<DB::String>();
|
||||
break;
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::SET):
|
||||
new_members = query->args[1].safeGet<DB::String>();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
auto response = client->zookeeper->reconfig(joining, leaving, new_members);
|
||||
std::cout << response.value << '\n';
|
||||
}
|
||||
|
||||
bool SyncCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
|
||||
{
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
return false;
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SyncCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
|
||||
{
|
||||
std::cout << client->zookeeper->sync(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
|
||||
}
|
||||
|
||||
bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
|
||||
|
@ -51,7 +51,7 @@ class CDCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
|
||||
String getHelpMessage() const override { return "{} [path] -- Changes the working path (default `.`)"; }
|
||||
};
|
||||
|
||||
class SetCommand : public IKeeperClientCommand
|
||||
@ -64,7 +64,7 @@ class SetCommand : public IKeeperClientCommand
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
|
||||
return "{} <path> <value> [version] -- Updates the node's value. Only updates if version matches (default: -1)";
|
||||
}
|
||||
};
|
||||
|
||||
@ -101,6 +101,17 @@ class GetCommand : public IKeeperClientCommand
|
||||
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
|
||||
};
|
||||
|
||||
class ExistsCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "exists"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Returns `1` if node exists, `0` otherwise"; }
|
||||
};
|
||||
|
||||
class GetStatCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "get_stat"; }
|
||||
@ -154,7 +165,6 @@ class FindBigFamily : public IKeeperClientCommand
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class RMCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "rm"; }
|
||||
@ -163,7 +173,7 @@ class RMCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
|
||||
String getHelpMessage() const override { return "{} <path> [version] -- Removes the node only if version matches (default: -1)"; }
|
||||
};
|
||||
|
||||
class RMRCommand : public IKeeperClientCommand
|
||||
@ -177,6 +187,35 @@ class RMRCommand : public IKeeperClientCommand
|
||||
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
|
||||
};
|
||||
|
||||
class ReconfigCommand : public IKeeperClientCommand
|
||||
{
|
||||
enum class Operation : UInt8
|
||||
{
|
||||
ADD = 0,
|
||||
REMOVE = 1,
|
||||
SET = 2,
|
||||
};
|
||||
|
||||
String getName() const override { return "reconfig"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <add|remove|set> \"<arg>\" [version] -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration"; }
|
||||
};
|
||||
|
||||
class SyncCommand: public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "sync"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Synchronizes node between processes and leader"; }
|
||||
};
|
||||
|
||||
class HelpCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "help"; }
|
||||
|
@ -84,8 +84,11 @@ std::vector<String> KeeperClient::getCompletions(const String & prefix) const
|
||||
|
||||
void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
|
||||
{
|
||||
if (!ask_confirmation)
|
||||
return callback();
|
||||
|
||||
std::cout << prompt << " Continue?\n";
|
||||
need_confirmation = true;
|
||||
waiting_confirmation = true;
|
||||
confirmation_callback = callback;
|
||||
}
|
||||
|
||||
@ -170,6 +173,14 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
|
||||
Poco::Util::Option("log-level", "", "set log level")
|
||||
.argument("<level>")
|
||||
.binding("log-level"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("no-confirmation", "", "if set, will not require a confirmation on several commands. default false for interactive and true for query")
|
||||
.binding("no-confirmation"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("tests-mode", "", "run keeper-client in a special mode for tests. all commands output are separated by special symbols. default false")
|
||||
.binding("tests-mode"));
|
||||
}
|
||||
|
||||
void KeeperClient::initialize(Poco::Util::Application & /* self */)
|
||||
@ -184,12 +195,15 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
|
||||
std::make_shared<CreateCommand>(),
|
||||
std::make_shared<TouchCommand>(),
|
||||
std::make_shared<GetCommand>(),
|
||||
std::make_shared<ExistsCommand>(),
|
||||
std::make_shared<GetStatCommand>(),
|
||||
std::make_shared<FindSuperNodes>(),
|
||||
std::make_shared<DeleteStaleBackups>(),
|
||||
std::make_shared<FindBigFamily>(),
|
||||
std::make_shared<RMCommand>(),
|
||||
std::make_shared<RMRCommand>(),
|
||||
std::make_shared<ReconfigCommand>(),
|
||||
std::make_shared<SyncCommand>(),
|
||||
std::make_shared<HelpCommand>(),
|
||||
std::make_shared<FourLetterWordCommand>(),
|
||||
});
|
||||
@ -229,18 +243,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
|
||||
EventNotifier::init();
|
||||
}
|
||||
|
||||
void KeeperClient::executeQuery(const String & query)
|
||||
{
|
||||
std::vector<String> queries;
|
||||
boost::algorithm::split(queries, query, boost::is_any_of(";"));
|
||||
|
||||
for (const auto & query_text : queries)
|
||||
{
|
||||
if (!query_text.empty())
|
||||
processQueryText(query_text);
|
||||
}
|
||||
}
|
||||
|
||||
bool KeeperClient::processQueryText(const String & text)
|
||||
{
|
||||
if (exit_strings.find(text) != exit_strings.end())
|
||||
@ -248,29 +250,44 @@ bool KeeperClient::processQueryText(const String & text)
|
||||
|
||||
try
|
||||
{
|
||||
if (need_confirmation)
|
||||
if (waiting_confirmation)
|
||||
{
|
||||
need_confirmation = false;
|
||||
waiting_confirmation = false;
|
||||
if (text.size() == 1 && (text == "y" || text == "Y"))
|
||||
confirmation_callback();
|
||||
return true;
|
||||
}
|
||||
|
||||
KeeperParser parser;
|
||||
String message;
|
||||
const char * begin = text.data();
|
||||
ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
|
||||
const char * end = begin + text.size();
|
||||
|
||||
if (!res)
|
||||
while (begin < end)
|
||||
{
|
||||
std::cerr << message << "\n";
|
||||
return true;
|
||||
String message;
|
||||
ASTPtr res = tryParseQuery(
|
||||
parser,
|
||||
begin,
|
||||
end,
|
||||
/* out_error_message = */ message,
|
||||
/* hilite = */ true,
|
||||
/* description = */ "",
|
||||
/* allow_multi_statements = */ true,
|
||||
/* max_query_size = */ 0,
|
||||
/* max_parser_depth = */ 0,
|
||||
/* skip_insignificant = */ false);
|
||||
|
||||
if (!res)
|
||||
{
|
||||
std::cerr << message << "\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto * query = res->as<ASTKeeperQuery>();
|
||||
|
||||
auto command = KeeperClient::commands.find(query->command);
|
||||
command->second->execute(query, this);
|
||||
}
|
||||
|
||||
auto * query = res->as<ASTKeeperQuery>();
|
||||
|
||||
auto command = KeeperClient::commands.find(query->command);
|
||||
command->second->execute(query, this);
|
||||
}
|
||||
catch (Coordination::Exception & err)
|
||||
{
|
||||
@ -279,7 +296,7 @@ bool KeeperClient::processQueryText(const String & text)
|
||||
return true;
|
||||
}
|
||||
|
||||
void KeeperClient::runInteractive()
|
||||
void KeeperClient::runInteractiveReplxx()
|
||||
{
|
||||
|
||||
LineReader::Patterns query_extenders = {"\\"};
|
||||
@ -299,7 +316,7 @@ void KeeperClient::runInteractive()
|
||||
while (true)
|
||||
{
|
||||
String prompt;
|
||||
if (need_confirmation)
|
||||
if (waiting_confirmation)
|
||||
prompt = "[y/n] ";
|
||||
else
|
||||
prompt = cwd.string() + " :) ";
|
||||
@ -313,6 +330,26 @@ void KeeperClient::runInteractive()
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperClient::runInteractiveInputStream()
|
||||
{
|
||||
for (String input; std::getline(std::cin, input);)
|
||||
{
|
||||
if (!processQueryText(input))
|
||||
break;
|
||||
|
||||
std::cout << "\a\a\a\a" << std::endl;
|
||||
std::cerr << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperClient::runInteractive()
|
||||
{
|
||||
if (config().hasOption("tests-mode"))
|
||||
runInteractiveInputStream();
|
||||
else
|
||||
runInteractiveReplxx();
|
||||
}
|
||||
|
||||
int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
{
|
||||
if (config().hasOption("help"))
|
||||
@ -362,8 +399,13 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
|
||||
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
|
||||
|
||||
if (config().has("no-confirmation") || config().has("query"))
|
||||
ask_confirmation = false;
|
||||
|
||||
if (config().has("query"))
|
||||
executeQuery(config().getString("query"));
|
||||
{
|
||||
processQueryText(config().getString("query"));
|
||||
}
|
||||
else
|
||||
runInteractive();
|
||||
|
||||
|
@ -49,8 +49,10 @@ public:
|
||||
|
||||
protected:
|
||||
void runInteractive();
|
||||
void runInteractiveReplxx();
|
||||
void runInteractiveInputStream();
|
||||
|
||||
bool processQueryText(const String & text);
|
||||
void executeQuery(const String & query);
|
||||
|
||||
void loadCommands(std::vector<Command> && new_commands);
|
||||
|
||||
@ -61,7 +63,8 @@ protected:
|
||||
|
||||
zkutil::ZooKeeperArgs zk_args;
|
||||
|
||||
bool need_confirmation = false;
|
||||
bool ask_confirmation = true;
|
||||
bool waiting_confirmation = false;
|
||||
|
||||
std::vector<String> registered_commands_and_four_letter_words;
|
||||
};
|
||||
|
@ -7,43 +7,32 @@ namespace DB
|
||||
|
||||
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
|
||||
{
|
||||
expected.add(pos, getTokenName(TokenType::BareWord));
|
||||
|
||||
if (pos->type == TokenType::BareWord)
|
||||
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
|
||||
{
|
||||
result = String(pos->begin, pos->end);
|
||||
++pos;
|
||||
ParserToken{TokenType::Whitespace}.ignore(pos);
|
||||
return true;
|
||||
if (!parseIdentifierOrStringLiteral(pos, expected, result))
|
||||
return false;
|
||||
}
|
||||
|
||||
bool status = parseIdentifierOrStringLiteral(pos, expected, result);
|
||||
ParserToken{TokenType::Whitespace}.ignore(pos);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
|
||||
{
|
||||
expected.add(pos, "path");
|
||||
|
||||
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
|
||||
return parseIdentifierOrStringLiteral(pos, expected, path);
|
||||
|
||||
String result;
|
||||
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream)
|
||||
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon)
|
||||
{
|
||||
result.append(pos->begin, pos->end);
|
||||
++pos;
|
||||
}
|
||||
|
||||
ParserToken{TokenType::Whitespace}.ignore(pos);
|
||||
|
||||
if (result.empty())
|
||||
return false;
|
||||
|
||||
path = result;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
|
||||
{
|
||||
expected.add(pos, "path");
|
||||
return parseKeeperArg(pos, expected, path);
|
||||
}
|
||||
|
||||
bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
auto query = std::make_shared<ASTKeeperQuery>();
|
||||
|
@ -572,17 +572,14 @@ void LocalServer::processConfig()
|
||||
if (!queries.empty() && config().has("queries-file"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
|
||||
|
||||
if (config().has("multiquery"))
|
||||
is_multiquery = true;
|
||||
|
||||
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
|
||||
if (is_interactive && !delayed_interactive)
|
||||
{
|
||||
if (config().has("multiquery"))
|
||||
is_multiquery = true;
|
||||
}
|
||||
else
|
||||
if (!is_interactive || delayed_interactive)
|
||||
{
|
||||
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
|
||||
ignore_error = config().getBool("ignore-error", false);
|
||||
is_multiquery = true;
|
||||
}
|
||||
|
||||
print_stack_trace = config().getBool("stacktrace", false);
|
||||
@ -881,6 +878,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
|
||||
config().setBool("no-system-tables", true);
|
||||
if (options.count("only-system-tables"))
|
||||
config().setBool("only-system-tables", true);
|
||||
if (options.count("database"))
|
||||
config().setString("default_database", options["database"].as<std::string>());
|
||||
|
||||
if (options.count("input-format"))
|
||||
config().setString("table-data-format", options["input-format"].as<std::string>());
|
||||
|
@ -1393,7 +1393,7 @@ try
|
||||
const auto interserver_listen_hosts = getInterserverListenHosts(config());
|
||||
const auto listen_try = getListenTry(config());
|
||||
|
||||
if (config().has("keeper_server"))
|
||||
if (config().has("keeper_server.server_id"))
|
||||
{
|
||||
#if USE_NURAFT
|
||||
//// If we don't have configured connection probably someone trying to use clickhouse-server instead
|
||||
|
@ -448,8 +448,6 @@
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
|
@ -10,9 +10,17 @@
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <re2/re2.h>
|
||||
#include <filesystem>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
#define EXTRACT_PATH_PATTERN ".*\\/store/(.*)"
|
||||
|
@ -26,10 +26,17 @@
|
||||
#include <IO/Operators.h>
|
||||
#include <Poco/AccessExpireCache.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <re2/re2.h>
|
||||
#include <filesystem>
|
||||
#include <mutex>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,11 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <Analyzer/Identifier.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -1,12 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <Analyzer/Identifier.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ColumnTransformers.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -6341,9 +6341,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
{
|
||||
/// For input function we should check if input format supports reading subset of columns.
|
||||
if (table_function_ptr->getName() == "input")
|
||||
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
|
||||
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat(), scope.context);
|
||||
else
|
||||
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
|
||||
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(scope.context);
|
||||
}
|
||||
|
||||
if (use_columns_from_insert_query)
|
||||
|
@ -1,195 +0,0 @@
|
||||
#include "UniqToCountPass.h"
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool matchFnUniq(String func_name)
|
||||
{
|
||||
auto name = Poco::toLower(func_name);
|
||||
return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined"
|
||||
|| name == "uniqCombined64";
|
||||
}
|
||||
|
||||
/// Extract the corresponding projection columns for group by node list.
|
||||
/// For example:
|
||||
/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode)
|
||||
NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node)
|
||||
{
|
||||
if (!query_node->hasGroupBy())
|
||||
return {};
|
||||
|
||||
NamesAndTypes result;
|
||||
for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren())
|
||||
{
|
||||
const auto & projection_columns = query_node->getProjectionColumns();
|
||||
const auto & projection_nodes = query_node->getProjection().getNodes();
|
||||
|
||||
assert(projection_columns.size() == projection_nodes.size());
|
||||
|
||||
for (size_t i = 0; i < projection_columns.size(); i++)
|
||||
{
|
||||
if (projection_nodes[i]->isEqual(*group_by_ele))
|
||||
result.push_back(projection_columns[i]);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Whether query_columns equals subquery_columns.
|
||||
/// query_columns: query columns from query
|
||||
/// subquery_columns: projection columns from subquery
|
||||
bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns)
|
||||
{
|
||||
if (query_columns.size() != subquery_columns.size())
|
||||
return false;
|
||||
|
||||
for (const auto & query_column : query_columns)
|
||||
{
|
||||
auto find = std::find_if(
|
||||
subquery_columns.begin(),
|
||||
subquery_columns.end(),
|
||||
[&](const auto & subquery_column) -> bool
|
||||
{
|
||||
if (auto * column_node = query_column->as<ColumnNode>())
|
||||
{
|
||||
return subquery_column == column_node->getColumn();
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (find == subquery_columns.end())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Whether subquery_columns contains all columns in subquery_columns.
|
||||
/// query_columns: query columns from query
|
||||
/// subquery_columns: projection columns from subquery
|
||||
bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns)
|
||||
{
|
||||
if (query_columns.size() > subquery_columns.size())
|
||||
return false;
|
||||
|
||||
for (const auto & query_column : query_columns)
|
||||
{
|
||||
auto find = std::find_if(
|
||||
subquery_columns.begin(),
|
||||
subquery_columns.end(),
|
||||
[&](const auto & subquery_column) -> bool
|
||||
{
|
||||
if (auto * column_node = query_column->as<ColumnNode>())
|
||||
{
|
||||
return subquery_column == column_node->getColumn();
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (find == subquery_columns.end())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext<UniqToCountVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<UniqToCountVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_uniq_to_count)
|
||||
return;
|
||||
|
||||
auto * query_node = node->as<QueryNode>();
|
||||
if (!query_node)
|
||||
return;
|
||||
|
||||
/// Check that query has only single table expression which is subquery
|
||||
auto * subquery_node = query_node->getJoinTree()->as<QueryNode>();
|
||||
if (!subquery_node)
|
||||
return;
|
||||
|
||||
/// Check that query has only single node in projection
|
||||
auto & projection_nodes = query_node->getProjection().getNodes();
|
||||
if (projection_nodes.size() != 1)
|
||||
return;
|
||||
|
||||
/// Check that projection_node is a function
|
||||
auto & projection_node = projection_nodes[0];
|
||||
auto * function_node = projection_node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
/// Check that query single projection node is `uniq` or its variants
|
||||
if (!matchFnUniq(function_node->getFunctionName()))
|
||||
return;
|
||||
|
||||
auto & uniq_arguments_nodes = function_node->getArguments().getNodes();
|
||||
|
||||
/// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)'
|
||||
auto match_subquery_with_distinct = [&]() -> bool
|
||||
{
|
||||
if (!subquery_node->isDistinct())
|
||||
return false;
|
||||
|
||||
/// uniq expression list == subquery projection columns
|
||||
if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)'
|
||||
auto match_subquery_with_group_by = [&]() -> bool
|
||||
{
|
||||
if (!subquery_node->hasGroupBy())
|
||||
return false;
|
||||
|
||||
/// uniq argument node list == subquery group by node list
|
||||
auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node);
|
||||
|
||||
if (!nodeListEquals(uniq_arguments_nodes, group_by_columns))
|
||||
return false;
|
||||
|
||||
/// subquery projection columns must contain all columns in uniq argument node list
|
||||
if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/// Replace uniq of initial query to count
|
||||
if (match_subquery_with_distinct() || match_subquery_with_group_by())
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
|
||||
|
||||
function_node->getArguments().getNodes().clear();
|
||||
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
UniqToCountVisitor visitor(context);
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Optimize `uniq` and its variants(except uniqUpTo) into `count` over subquery.
|
||||
* Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to
|
||||
* Result: 'SELECT count() FROM (SELECT DISTINCT x ...)'
|
||||
*
|
||||
* Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to
|
||||
* Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)'
|
||||
*
|
||||
* Note that we can rewrite all uniq variants except uniqUpTo.
|
||||
*/
|
||||
class UniqToCountPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "UniqToCount"; }
|
||||
|
||||
String getDescription() override
|
||||
{
|
||||
return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.";
|
||||
}
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
};
|
||||
|
||||
}
|
@ -18,7 +18,6 @@
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Analyzer/Passes/QueryAnalysisPass.h>
|
||||
#include <Analyzer/Passes/CountDistinctPass.h>
|
||||
#include <Analyzer/Passes/UniqToCountPass.h>
|
||||
#include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
|
||||
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
|
||||
#include <Analyzer/Passes/SumIfToCountIfPass.h>
|
||||
@ -248,7 +247,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
|
||||
|
||||
manager.addPass(std::make_unique<CountDistinctPass>());
|
||||
manager.addPass(std::make_unique<UniqToCountPass>());
|
||||
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
|
||||
manager.addPass(std::make_unique<SumIfToCountIfPass>());
|
||||
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());
|
||||
|
@ -316,7 +316,6 @@ target_link_libraries(clickhouse_common_io
|
||||
boost::context
|
||||
ch_contrib::cityhash
|
||||
ch_contrib::re2
|
||||
ch_contrib::re2_st
|
||||
ch_contrib::zlib
|
||||
pcg_random
|
||||
Poco::Foundation
|
||||
|
@ -441,7 +441,20 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
if (!block)
|
||||
return;
|
||||
|
||||
processed_rows += block.rows();
|
||||
if (block.rows() == 0 && partial_result_mode == PartialResultMode::Active)
|
||||
{
|
||||
partial_result_mode = PartialResultMode::Inactive;
|
||||
if (is_interactive)
|
||||
{
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
std::cout << "Full result:" << std::endl;
|
||||
progress_indication.writeProgress(*tty_buf);
|
||||
}
|
||||
}
|
||||
|
||||
if (partial_result_mode == PartialResultMode::Inactive)
|
||||
processed_rows += block.rows();
|
||||
|
||||
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
|
||||
initOutputFormat(block, parsed_query);
|
||||
|
||||
@ -451,13 +464,20 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
|
||||
return;
|
||||
|
||||
if (!is_interactive && partial_result_mode == PartialResultMode::Active)
|
||||
return;
|
||||
|
||||
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
|
||||
if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout))
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
|
||||
try
|
||||
{
|
||||
output_format->write(materializeBlock(block));
|
||||
if (partial_result_mode == PartialResultMode::Active)
|
||||
output_format->writePartialResult(materializeBlock(block));
|
||||
else
|
||||
output_format->write(materializeBlock(block));
|
||||
|
||||
written_first_block = true;
|
||||
}
|
||||
catch (const Exception &)
|
||||
@ -521,6 +541,9 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
|
||||
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
|
||||
try
|
||||
{
|
||||
if (partial_result_mode == PartialResultMode::NotInit)
|
||||
partial_result_mode = PartialResultMode::Active;
|
||||
|
||||
if (!output_format)
|
||||
{
|
||||
/// Ignore all results when fuzzing as they can be huge.
|
||||
@ -931,6 +954,14 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
|
||||
|
||||
const auto & settings = global_context->getSettingsRef();
|
||||
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
|
||||
bool has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0;
|
||||
|
||||
if (has_partial_result_setting)
|
||||
{
|
||||
partial_result_mode = PartialResultMode::NotInit;
|
||||
if (is_interactive)
|
||||
std::cout << "Partial result:" << std::endl;
|
||||
}
|
||||
|
||||
int retries_left = 10;
|
||||
while (retries_left)
|
||||
@ -1040,7 +1071,9 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
|
||||
}
|
||||
catch (const LocalFormatError &)
|
||||
{
|
||||
local_format_error = std::current_exception();
|
||||
/// Remember the first exception.
|
||||
if (!local_format_error)
|
||||
local_format_error = std::current_exception();
|
||||
connection->sendCancel();
|
||||
}
|
||||
}
|
||||
@ -1736,6 +1769,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
}
|
||||
|
||||
processed_rows = 0;
|
||||
partial_result_mode = PartialResultMode::Inactive;
|
||||
written_first_block = false;
|
||||
progress_indication.resetProgress();
|
||||
profile_events.watch.restart();
|
||||
|
@ -272,6 +272,21 @@ protected:
|
||||
size_t processed_rows = 0; /// How many rows have been read or written.
|
||||
bool print_num_processed_rows = false; /// Whether to print the number of processed rows at
|
||||
|
||||
enum class PartialResultMode: UInt8
|
||||
{
|
||||
/// Query doesn't show partial result before the first block with 0 rows.
|
||||
/// The first block with 0 rows initializes the output table format using its header.
|
||||
NotInit,
|
||||
|
||||
/// Query shows partial result after the first and before the second block with 0 rows.
|
||||
/// The second block with 0 rows indicates that that receiving blocks with partial result has been completed and next blocks will be with the full result.
|
||||
Active,
|
||||
|
||||
/// Query doesn't show partial result at all.
|
||||
Inactive,
|
||||
};
|
||||
PartialResultMode partial_result_mode = PartialResultMode::Inactive;
|
||||
|
||||
bool print_stack_trace = false;
|
||||
/// The last exception that was received from the server. Is used for the
|
||||
/// return code in batch mode.
|
||||
|
@ -73,7 +73,7 @@ ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & fu
|
||||
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, size_t version_)
|
||||
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, std::optional<size_t> version_)
|
||||
{
|
||||
func = func_;
|
||||
version = version_;
|
||||
|
@ -103,7 +103,7 @@ private:
|
||||
public:
|
||||
~ColumnAggregateFunction() override;
|
||||
|
||||
void set(const AggregateFunctionPtr & func_, size_t version_);
|
||||
void set(const AggregateFunctionPtr & func_, std::optional<size_t> version_ = std::nullopt);
|
||||
|
||||
AggregateFunctionPtr getAggregateFunction() { return func; }
|
||||
AggregateFunctionPtr getAggregateFunction() const { return func; }
|
||||
|
@ -80,7 +80,7 @@ StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, cha
|
||||
res.data = pos;
|
||||
}
|
||||
memcpy(pos, &data[n], sizeof(T));
|
||||
return StringRef(pos, sizeof(T));
|
||||
return res;
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
|
@ -670,9 +670,8 @@ UInt128 ColumnUnique<ColumnType>::IncrementalHash::getHash(const ColumnType & co
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
column.updateHashWithValue(i, sip_hash);
|
||||
|
||||
hash = sip_hash.get128();
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
hash = sip_hash.get128();
|
||||
cur_hash = hash;
|
||||
num_added_rows.store(column_size);
|
||||
}
|
||||
|
@ -433,7 +433,7 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
|
||||
template <typename T>
|
||||
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
||||
{
|
||||
auto res = this->create();
|
||||
auto res = this->create(size);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
|
@ -397,6 +397,13 @@ public:
|
||||
/// It affects performance only (not correctness).
|
||||
virtual void reserve(size_t /*n*/) {}
|
||||
|
||||
/// Requests the removal of unused capacity.
|
||||
/// It is a non-binding request to reduce the capacity of the underlying container to its size.
|
||||
virtual MutablePtr shrinkToFit() const
|
||||
{
|
||||
return cloneResized(size());
|
||||
}
|
||||
|
||||
/// If we have another column as a source (owner of data), copy all data to ourself and reset source.
|
||||
virtual void ensureOwnership() {}
|
||||
|
||||
|
@ -1,24 +1,8 @@
|
||||
#include "Allocator.h"
|
||||
|
||||
/** Keep definition of this constant in cpp file; otherwise its value
|
||||
* is inlined into allocator code making it impossible to override it
|
||||
* in third-party code.
|
||||
*
|
||||
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
|
||||
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
|
||||
#else
|
||||
/**
|
||||
* In debug build, use small mmap threshold to reproduce more memory
|
||||
* stomping bugs. Along with ASLR it will hopefully detect more issues than
|
||||
* ASan. The program may fail due to the limit on number of memory mappings.
|
||||
*
|
||||
* Not too small to avoid too quick exhaust of memory mappings.
|
||||
*/
|
||||
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
|
||||
#endif
|
||||
|
||||
/// Constant is chosen almost arbitrarily, what I observed is 128KB is too small, 1MB is almost indistinguishable from 64MB and 1GB is too large.
|
||||
extern const size_t POPULATE_THRESHOLD = 16 * 1024 * 1024;
|
||||
|
||||
template class Allocator<false, false>;
|
||||
template class Allocator<true, false>;
|
||||
|
@ -20,12 +20,6 @@
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#if defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER)
|
||||
/// Thread and memory sanitizers do not intercept mremap. The usage of
|
||||
/// mremap will lead to false positives.
|
||||
#define DISABLE_MREMAP 1
|
||||
#endif
|
||||
#include <base/mremap.h>
|
||||
#include <base/getPageSize.h>
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
@ -35,52 +29,33 @@
|
||||
|
||||
#include <Common/Allocator_fwd.h>
|
||||
|
||||
#include <base/errnoToString.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Many modern allocators (for example, tcmalloc) do not do a mremap for
|
||||
* realloc, even in case of large enough chunks of memory. Although this allows
|
||||
* you to increase performance and reduce memory consumption during realloc.
|
||||
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||
* The threshold (64 MB) is chosen quite large, since changing the address
|
||||
* space is very slow, especially in the case of a large number of threads. We
|
||||
* expect that the set of operations mmap/something to do/mremap can only be
|
||||
* performed about 1000 times per second.
|
||||
*
|
||||
* P.S. This is also required, because tcmalloc can not allocate a chunk of
|
||||
* memory greater than 16 GB.
|
||||
*
|
||||
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
|
||||
* to override it during linkage when using ClickHouse as a library in
|
||||
* third-party applications which may already use own allocator doing mmaps
|
||||
* in the implementation of alloc/realloc.
|
||||
*/
|
||||
extern const size_t MMAP_THRESHOLD;
|
||||
extern const size_t POPULATE_THRESHOLD;
|
||||
|
||||
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric MMappedAllocs;
|
||||
extern const Metric MMappedAllocBytes;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
|
||||
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
|
||||
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
|
||||
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
|
||||
* That is why we don't do manual mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
|
||||
*/
|
||||
|
||||
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
|
||||
* Also used in hash tables.
|
||||
* The interface is different from std::allocator
|
||||
@ -88,10 +63,8 @@ namespace ErrorCodes
|
||||
* - passing the size into the `free` method;
|
||||
* - by the presence of the `alignment` argument;
|
||||
* - the possibility of zeroing memory (used in hash tables);
|
||||
* - random hint address for mmap
|
||||
* - mmap_threshold for using mmap less or more
|
||||
*/
|
||||
template <bool clear_memory_, bool mmap_populate>
|
||||
template <bool clear_memory_, bool populate>
|
||||
class Allocator
|
||||
{
|
||||
public:
|
||||
@ -111,7 +84,7 @@ public:
|
||||
try
|
||||
{
|
||||
checkSize(size);
|
||||
freeNoTrack(buf, size);
|
||||
freeNoTrack(buf);
|
||||
auto trace = CurrentMemoryTracker::free(size);
|
||||
trace.onFree(buf, size);
|
||||
}
|
||||
@ -135,8 +108,7 @@ public:
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
}
|
||||
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
|
||||
&& alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
else if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
@ -145,7 +117,10 @@ public:
|
||||
|
||||
void * new_buf = ::realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
{
|
||||
DB::throwFromErrno(
|
||||
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
|
||||
buf = new_buf;
|
||||
trace_alloc.onAlloc(buf, new_size);
|
||||
@ -154,46 +129,18 @@ public:
|
||||
if (new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
|
||||
}
|
||||
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
|
||||
{
|
||||
/// Resize mmap'd memory region.
|
||||
auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
|
||||
trace_free.onFree(buf, old_size);
|
||||
|
||||
// On apple and freebsd self-implemented mremap used (common/mremap.h)
|
||||
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
|
||||
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
|
||||
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
trace_alloc.onAlloc(buf, new_size);
|
||||
}
|
||||
else if (new_size < MMAP_THRESHOLD)
|
||||
{
|
||||
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
|
||||
auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
|
||||
trace_free.onFree(buf, old_size);
|
||||
|
||||
void * new_buf = allocNoTrack(new_size, alignment);
|
||||
trace_alloc.onAlloc(buf, new_size);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
freeNoTrack(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
|
||||
if constexpr (populate)
|
||||
prefaultPages(buf, new_size);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -205,83 +152,42 @@ protected:
|
||||
|
||||
static constexpr bool clear_memory = clear_memory_;
|
||||
|
||||
// Freshly mmapped pages are copy-on-write references to a global zero page.
|
||||
// On the first write, a page fault occurs, and an actual writable page is
|
||||
// allocated. If we are going to use this memory soon, such as when resizing
|
||||
// hash tables, it makes sense to pre-fault the pages by passing
|
||||
// MAP_POPULATE to mmap(). This takes some time, but should be faster
|
||||
// overall than having a hot loop interrupted by page faults.
|
||||
// It is only supported on Linux.
|
||||
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
|
||||
#if defined(OS_LINUX)
|
||||
| (mmap_populate ? MAP_POPULATE : 0)
|
||||
#endif
|
||||
;
|
||||
|
||||
private:
|
||||
void * allocNoTrack(size_t size, size_t alignment)
|
||||
{
|
||||
void * buf;
|
||||
size_t mmap_min_alignment = ::getPageSize();
|
||||
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if (alignment > mmap_min_alignment)
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Too large alignment {}: more than page size when allocating {}.",
|
||||
ReadableSize(alignment), ReadableSize(size));
|
||||
if constexpr (clear_memory)
|
||||
buf = ::calloc(size, 1);
|
||||
else
|
||||
buf = ::malloc(size);
|
||||
|
||||
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
|
||||
mmap_flags, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
|
||||
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
buf = ::calloc(size, 1);
|
||||
else
|
||||
buf = ::malloc(size);
|
||||
buf = nullptr;
|
||||
int res = posix_memalign(&buf, alignment, size);
|
||||
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = nullptr;
|
||||
int res = posix_memalign(&buf, alignment, size);
|
||||
if (0 != res)
|
||||
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
|
||||
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
|
||||
|
||||
if (0 != res)
|
||||
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
|
||||
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
|
||||
|
||||
if constexpr (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
if constexpr (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
|
||||
if constexpr (populate)
|
||||
prefaultPages(buf, size);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void freeNoTrack(void * buf, size_t size)
|
||||
void freeNoTrack(void * buf)
|
||||
{
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (0 != munmap(buf, size))
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
|
||||
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
::free(buf);
|
||||
}
|
||||
::free(buf);
|
||||
}
|
||||
|
||||
void checkSize(size_t size)
|
||||
@ -291,20 +197,32 @@ private:
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
|
||||
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
|
||||
/// default. This may lead to worse TLB performance.
|
||||
void * getMmapHint()
|
||||
/// Address passed to madvise is required to be aligned to the page boundary.
|
||||
auto adjustToPageSize(void * buf, size_t len, size_t page_size)
|
||||
{
|
||||
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
|
||||
const uintptr_t address_numeric = reinterpret_cast<uintptr_t>(buf);
|
||||
const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size;
|
||||
return std::make_pair(reinterpret_cast<void *>(next_page_start), len - (next_page_start - address_numeric));
|
||||
}
|
||||
#else
|
||||
void * getMmapHint()
|
||||
|
||||
void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#if defined(MADV_POPULATE_WRITE)
|
||||
if (len_ < POPULATE_THRESHOLD)
|
||||
return;
|
||||
|
||||
static const size_t page_size = ::getPageSize();
|
||||
if (len_ < page_size) /// Rounded address should be still within [buf, buf + len).
|
||||
return;
|
||||
|
||||
auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
|
||||
if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
|
||||
LOG_TRACE(
|
||||
LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
|
||||
"Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
|
||||
errnoToString(res));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
* This file provides forward declarations for Allocator.
|
||||
*/
|
||||
|
||||
template <bool clear_memory_, bool mmap_populate = false>
|
||||
template <bool clear_memory_, bool populate = false>
|
||||
class Allocator;
|
||||
|
||||
template <typename Base, size_t N = 64, size_t Alignment = 1>
|
||||
|
@ -57,7 +57,7 @@ ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCou
|
||||
*waiter = this;
|
||||
}
|
||||
|
||||
// Grant single slot to allocation, returns true iff more slot(s) are required
|
||||
// Grant single slot to allocation returns true iff more slot(s) are required
|
||||
bool ConcurrencyControl::Allocation::grant()
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
|
@ -175,8 +175,6 @@
|
||||
M(PartsInMemory, "In-memory parts.") \
|
||||
M(MMappedFiles, "Total number of mmapped files.") \
|
||||
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
|
||||
M(MMappedAllocs, "Total number of mmapped allocations") \
|
||||
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
|
||||
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
|
||||
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
|
||||
M(KafkaConsumers, "Number of active Kafka consumers") \
|
||||
|
@ -97,6 +97,14 @@ ThreadGroupPtr CurrentThread::getGroup()
|
||||
return current_thread->getThreadGroup();
|
||||
}
|
||||
|
||||
ContextPtr CurrentThread::getQueryContext()
|
||||
{
|
||||
if (unlikely(!current_thread))
|
||||
return {};
|
||||
|
||||
return current_thread->getQueryContext();
|
||||
}
|
||||
|
||||
std::string_view CurrentThread::getQueryId()
|
||||
{
|
||||
if (unlikely(!current_thread))
|
||||
|
@ -86,6 +86,10 @@ public:
|
||||
static void finalizePerformanceCounters();
|
||||
|
||||
/// Returns a non-empty string if the thread is attached to a query
|
||||
|
||||
/// Returns attached query context
|
||||
static ContextPtr getQueryContext();
|
||||
|
||||
static std::string_view getQueryId();
|
||||
|
||||
/// Initializes query with current thread as master thread in constructor, and detaches it in destructor
|
||||
|
@ -71,14 +71,14 @@ private:
|
||||
// Same as above but select different function overloads for zero saturation.
|
||||
STRONG_TYPEDEF(UInt32, LUTIndexWithSaturation)
|
||||
|
||||
static inline LUTIndex normalizeLUTIndex(UInt32 index)
|
||||
static LUTIndex normalizeLUTIndex(UInt32 index)
|
||||
{
|
||||
if (index >= DATE_LUT_SIZE)
|
||||
return LUTIndex(DATE_LUT_SIZE - 1);
|
||||
return LUTIndex{index};
|
||||
}
|
||||
|
||||
static inline LUTIndex normalizeLUTIndex(Int64 index)
|
||||
static LUTIndex normalizeLUTIndex(Int64 index)
|
||||
{
|
||||
if (unlikely(index < 0))
|
||||
return LUTIndex(0);
|
||||
@ -88,59 +88,59 @@ private:
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator+(const LUTIndex & index, const T v)
|
||||
friend LUTIndex operator+(const LUTIndex & index, const T v)
|
||||
{
|
||||
return normalizeLUTIndex(index.toUnderType() + UInt32(v));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator+(const T v, const LUTIndex & index)
|
||||
friend LUTIndex operator+(const T v, const LUTIndex & index)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(v + index.toUnderType()));
|
||||
}
|
||||
|
||||
friend inline LUTIndex operator+(const LUTIndex & index, const LUTIndex & v)
|
||||
friend LUTIndex operator+(const LUTIndex & index, const LUTIndex & v)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<UInt32>(index.toUnderType() + v.toUnderType()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator-(const LUTIndex & index, const T v)
|
||||
friend LUTIndex operator-(const LUTIndex & index, const T v)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(index.toUnderType() - UInt32(v)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator-(const T v, const LUTIndex & index)
|
||||
friend LUTIndex operator-(const T v, const LUTIndex & index)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(v - index.toUnderType()));
|
||||
}
|
||||
|
||||
friend inline LUTIndex operator-(const LUTIndex & index, const LUTIndex & v)
|
||||
friend LUTIndex operator-(const LUTIndex & index, const LUTIndex & v)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(index.toUnderType() - v.toUnderType()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator*(const LUTIndex & index, const T v)
|
||||
friend LUTIndex operator*(const LUTIndex & index, const T v)
|
||||
{
|
||||
return normalizeLUTIndex(index.toUnderType() * UInt32(v));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator*(const T v, const LUTIndex & index)
|
||||
friend LUTIndex operator*(const T v, const LUTIndex & index)
|
||||
{
|
||||
return normalizeLUTIndex(v * index.toUnderType());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator/(const LUTIndex & index, const T v)
|
||||
friend LUTIndex operator/(const LUTIndex & index, const T v)
|
||||
{
|
||||
return normalizeLUTIndex(index.toUnderType() / UInt32(v));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
friend inline LUTIndex operator/(const T v, const LUTIndex & index)
|
||||
friend LUTIndex operator/(const T v, const LUTIndex & index)
|
||||
{
|
||||
return normalizeLUTIndex(UInt32(v) / index.toUnderType());
|
||||
}
|
||||
@ -172,12 +172,12 @@ public:
|
||||
Int8 amount_of_offset_change_value; /// Usually -4 or 4, but look at Lord Howe Island. Multiply by OffsetChangeFactor
|
||||
UInt8 time_at_offset_change_value; /// In seconds from beginning of the day. Multiply by OffsetChangeFactor
|
||||
|
||||
inline Int32 amount_of_offset_change() const /// NOLINT
|
||||
Int32 amount_of_offset_change() const /// NOLINT
|
||||
{
|
||||
return static_cast<Int32>(amount_of_offset_change_value) * OffsetChangeFactor;
|
||||
}
|
||||
|
||||
inline UInt32 time_at_offset_change() const /// NOLINT
|
||||
UInt32 time_at_offset_change() const /// NOLINT
|
||||
{
|
||||
return static_cast<UInt32>(time_at_offset_change_value) * OffsetChangeFactor;
|
||||
}
|
||||
@ -221,7 +221,7 @@ private:
|
||||
/// Time zone name.
|
||||
std::string time_zone;
|
||||
|
||||
inline LUTIndex findIndex(Time t) const
|
||||
LUTIndex findIndex(Time t) const
|
||||
{
|
||||
/// First guess.
|
||||
Time guess = (t / 86400) + daynum_offset_epoch;
|
||||
@ -248,34 +248,34 @@ private:
|
||||
return LUTIndex(guess ? static_cast<unsigned>(guess) - 1 : 0);
|
||||
}
|
||||
|
||||
static inline LUTIndex toLUTIndex(DayNum d)
|
||||
static LUTIndex toLUTIndex(DayNum d)
|
||||
{
|
||||
return normalizeLUTIndex(d + daynum_offset_epoch);
|
||||
}
|
||||
|
||||
static inline LUTIndex toLUTIndex(ExtendedDayNum d)
|
||||
static LUTIndex toLUTIndex(ExtendedDayNum d)
|
||||
{
|
||||
return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
|
||||
}
|
||||
|
||||
inline LUTIndex toLUTIndex(Time t) const
|
||||
LUTIndex toLUTIndex(Time t) const
|
||||
{
|
||||
return findIndex(t);
|
||||
}
|
||||
|
||||
static inline LUTIndex toLUTIndex(LUTIndex i)
|
||||
static LUTIndex toLUTIndex(LUTIndex i)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline const Values & find(DateOrTime v) const
|
||||
const Values & find(DateOrTime v) const
|
||||
{
|
||||
return lut[toLUTIndex(v)];
|
||||
}
|
||||
|
||||
template <typename DateOrTime, typename Divisor>
|
||||
inline DateOrTime roundDown(DateOrTime x, Divisor divisor) const
|
||||
DateOrTime roundDown(DateOrTime x, Divisor divisor) const
|
||||
{
|
||||
static_assert(std::is_integral_v<DateOrTime> && std::is_integral_v<Divisor>);
|
||||
assert(divisor > 0);
|
||||
@ -336,7 +336,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toDayNum(DateOrTime v) const
|
||||
auto toDayNum(DateOrTime v) const
|
||||
{
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return DayNum{static_cast<DayNum::UnderlyingType>(saturateMinus(toLUTIndex(v).toUnderType(), daynum_offset_epoch))};
|
||||
@ -346,7 +346,7 @@ public:
|
||||
|
||||
/// Round down to start of monday.
|
||||
template <typename DateOrTime>
|
||||
inline Time toFirstDayOfWeek(DateOrTime v) const
|
||||
Time toFirstDayOfWeek(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -356,7 +356,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toFirstDayNumOfWeek(DateOrTime v) const
|
||||
auto toFirstDayNumOfWeek(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -367,7 +367,7 @@ public:
|
||||
|
||||
/// Round up to the last day of week.
|
||||
template <typename DateOrTime>
|
||||
inline Time toLastDayOfWeek(DateOrTime v) const
|
||||
Time toLastDayOfWeek(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -377,7 +377,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toLastDayNumOfWeek(DateOrTime v) const
|
||||
auto toLastDayNumOfWeek(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -388,7 +388,7 @@ public:
|
||||
|
||||
/// Round down to start of month.
|
||||
template <typename DateOrTime>
|
||||
inline Time toFirstDayOfMonth(DateOrTime v) const
|
||||
Time toFirstDayOfMonth(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -398,7 +398,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toFirstDayNumOfMonth(DateOrTime v) const
|
||||
auto toFirstDayNumOfMonth(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -409,7 +409,7 @@ public:
|
||||
|
||||
/// Round up to last day of month.
|
||||
template <typename DateOrTime>
|
||||
inline Time toLastDayOfMonth(DateOrTime v) const
|
||||
Time toLastDayOfMonth(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -419,7 +419,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toLastDayNumOfMonth(DateOrTime v) const
|
||||
auto toLastDayNumOfMonth(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
@ -430,7 +430,7 @@ public:
|
||||
|
||||
/// Round down to start of quarter.
|
||||
template <typename DateOrTime>
|
||||
inline auto toFirstDayNumOfQuarter(DateOrTime v) const
|
||||
auto toFirstDayNumOfQuarter(DateOrTime v) const
|
||||
{
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return toDayNum(LUTIndexWithSaturation(toFirstDayOfQuarterIndex(v)));
|
||||
@ -439,7 +439,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline LUTIndex toFirstDayOfQuarterIndex(DateOrTime v) const
|
||||
LUTIndex toFirstDayOfQuarterIndex(DateOrTime v) const
|
||||
{
|
||||
LUTIndex index = toLUTIndex(v);
|
||||
size_t month_inside_quarter = (lut[index].month - 1) % 3;
|
||||
@ -455,25 +455,25 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Time toFirstDayOfQuarter(DateOrTime v) const
|
||||
Time toFirstDayOfQuarter(DateOrTime v) const
|
||||
{
|
||||
return toDate(toFirstDayOfQuarterIndex(v));
|
||||
}
|
||||
|
||||
/// Round down to start of year.
|
||||
inline Time toFirstDayOfYear(Time t) const
|
||||
Time toFirstDayOfYear(Time t) const
|
||||
{
|
||||
return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline LUTIndex toFirstDayNumOfYearIndex(DateOrTime v) const
|
||||
LUTIndex toFirstDayNumOfYearIndex(DateOrTime v) const
|
||||
{
|
||||
return years_lut[lut[toLUTIndex(v)].year - DATE_LUT_MIN_YEAR];
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toFirstDayNumOfYear(DateOrTime v) const
|
||||
auto toFirstDayNumOfYear(DateOrTime v) const
|
||||
{
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfYearIndex(v)));
|
||||
@ -481,14 +481,14 @@ public:
|
||||
return toDayNum(LUTIndex(toFirstDayNumOfYearIndex(v)));
|
||||
}
|
||||
|
||||
inline Time toFirstDayOfNextMonth(Time t) const
|
||||
Time toFirstDayOfNextMonth(Time t) const
|
||||
{
|
||||
LUTIndex index = findIndex(t);
|
||||
index += 32 - lut[index].day_of_month;
|
||||
return lut[index - (lut[index].day_of_month - 1)].date;
|
||||
}
|
||||
|
||||
inline Time toFirstDayOfPrevMonth(Time t) const
|
||||
Time toFirstDayOfPrevMonth(Time t) const
|
||||
{
|
||||
LUTIndex index = findIndex(t);
|
||||
index -= lut[index].day_of_month;
|
||||
@ -496,13 +496,13 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 daysInMonth(DateOrTime value) const
|
||||
UInt8 daysInMonth(DateOrTime value) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(value);
|
||||
return lut[i].days_in_month;
|
||||
}
|
||||
|
||||
inline UInt8 daysInMonth(Int16 year, UInt8 month) const
|
||||
UInt8 daysInMonth(Int16 year, UInt8 month) const
|
||||
{
|
||||
UInt16 idx = year - DATE_LUT_MIN_YEAR;
|
||||
if (unlikely(idx >= DATE_LUT_YEARS))
|
||||
@ -515,12 +515,12 @@ public:
|
||||
|
||||
/** Round to start of day, then shift for specified amount of days.
|
||||
*/
|
||||
inline Time toDateAndShift(Time t, Int32 days) const
|
||||
Time toDateAndShift(Time t, Int32 days) const
|
||||
{
|
||||
return lut[findIndex(t) + days].date;
|
||||
}
|
||||
|
||||
inline Time toTime(Time t) const
|
||||
Time toTime(Time t) const
|
||||
{
|
||||
const LUTIndex index = findIndex(t);
|
||||
|
||||
@ -532,7 +532,7 @@ public:
|
||||
return res - offset_at_start_of_epoch; /// Starting at 1970-01-01 00:00:00 local time.
|
||||
}
|
||||
|
||||
inline unsigned toHour(Time t) const
|
||||
unsigned toHour(Time t) const
|
||||
{
|
||||
const LUTIndex index = findIndex(t);
|
||||
|
||||
@ -552,7 +552,7 @@ public:
|
||||
* then subtract the former from the latter to get the offset result.
|
||||
* The boundaries when meets DST(daylight saving time) change should be handled very carefully.
|
||||
*/
|
||||
inline Time timezoneOffset(Time t) const
|
||||
Time timezoneOffset(Time t) const
|
||||
{
|
||||
const LUTIndex index = findIndex(t);
|
||||
|
||||
@ -574,7 +574,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
inline unsigned toSecond(Time t) const
|
||||
unsigned toSecond(Time t) const
|
||||
{
|
||||
if (likely(offset_is_whole_number_of_minutes_during_epoch))
|
||||
{
|
||||
@ -593,7 +593,7 @@ public:
|
||||
return time % 60;
|
||||
}
|
||||
|
||||
inline unsigned toMinute(Time t) const
|
||||
unsigned toMinute(Time t) const
|
||||
{
|
||||
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
|
||||
return (t / 60) % 60;
|
||||
@ -630,11 +630,11 @@ public:
|
||||
* because the same calendar day starts/ends at different timestamps in different time zones)
|
||||
*/
|
||||
|
||||
inline Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
|
||||
inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
|
||||
Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
|
||||
Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Time toDate(DateOrTime v) const
|
||||
Time toDate(DateOrTime v) const
|
||||
{
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return lut_saturated[toLUTIndex(v)].date;
|
||||
@ -643,20 +643,20 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
|
||||
UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
|
||||
UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
|
||||
Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
|
||||
|
||||
/// 1-based, starts on Monday
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
|
||||
UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
|
||||
UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
|
||||
{
|
||||
WeekDayMode mode = check_week_day_mode(week_day_mode);
|
||||
|
||||
@ -674,10 +674,10 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
|
||||
UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt16 toDayOfYear(DateOrTime v) const
|
||||
UInt16 toDayOfYear(DateOrTime v) const
|
||||
{
|
||||
// TODO: different overload for ExtendedDayNum
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
@ -688,7 +688,7 @@ public:
|
||||
/// (round down to monday and divide DayNum by 7; we made an assumption,
|
||||
/// that in domain of the function there was no weeks with any other number of days than 7)
|
||||
template <typename DateOrTime>
|
||||
inline Int32 toRelativeWeekNum(DateOrTime v) const
|
||||
Int32 toRelativeWeekNum(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
/// We add 8 to avoid underflow at beginning of unix epoch.
|
||||
@ -697,7 +697,7 @@ public:
|
||||
|
||||
/// Get year that contains most of the current week. Week begins at monday.
|
||||
template <typename DateOrTime>
|
||||
inline Int16 toISOYear(DateOrTime v) const
|
||||
Int16 toISOYear(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
/// That's effectively the year of thursday of current week.
|
||||
@ -708,7 +708,7 @@ public:
|
||||
/// Example: ISO year 2019 begins at 2018-12-31. And ISO year 2017 begins at 2017-01-02.
|
||||
/// https://en.wikipedia.org/wiki/ISO_week_date
|
||||
template <typename DateOrTime>
|
||||
inline LUTIndex toFirstDayNumOfISOYearIndex(DateOrTime v) const
|
||||
LUTIndex toFirstDayNumOfISOYearIndex(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
auto iso_year = toISOYear(i);
|
||||
@ -722,7 +722,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toFirstDayNumOfISOYear(DateOrTime v) const
|
||||
auto toFirstDayNumOfISOYear(DateOrTime v) const
|
||||
{
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfISOYearIndex(v)));
|
||||
@ -730,7 +730,7 @@ public:
|
||||
return toDayNum(LUTIndex(toFirstDayNumOfISOYearIndex(v)));
|
||||
}
|
||||
|
||||
inline Time toFirstDayOfISOYear(Time t) const
|
||||
Time toFirstDayOfISOYear(Time t) const
|
||||
{
|
||||
return lut[toFirstDayNumOfISOYearIndex(t)].date;
|
||||
}
|
||||
@ -738,7 +738,7 @@ public:
|
||||
/// ISO 8601 week number. Week begins at monday.
|
||||
/// The week number 1 is the first week in year that contains 4 or more days (that's more than half).
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toISOWeek(DateOrTime v) const
|
||||
UInt8 toISOWeek(DateOrTime v) const
|
||||
{
|
||||
return 1 + (toFirstDayNumOfWeek(v) - toDayNum(toFirstDayNumOfISOYearIndex(v))) / 7;
|
||||
}
|
||||
@ -777,7 +777,7 @@ public:
|
||||
next week is week 1.
|
||||
*/
|
||||
template <typename DateOrTime>
|
||||
inline YearWeek toYearWeek(DateOrTime v, UInt8 week_mode) const
|
||||
YearWeek toYearWeek(DateOrTime v, UInt8 week_mode) const
|
||||
{
|
||||
const bool newyear_day_mode = week_mode & static_cast<UInt8>(WeekModeFlag::NEWYEAR_DAY);
|
||||
week_mode = check_week_mode(week_mode);
|
||||
@ -836,7 +836,7 @@ public:
|
||||
/// Calculate week number of WeekModeFlag::NEWYEAR_DAY mode
|
||||
/// The week number 1 is the first week in year that contains January 1,
|
||||
template <typename DateOrTime>
|
||||
inline YearWeek toYearWeekOfNewyearMode(DateOrTime v, bool monday_first_mode) const
|
||||
YearWeek toYearWeekOfNewyearMode(DateOrTime v, bool monday_first_mode) const
|
||||
{
|
||||
YearWeek yw(0, 0);
|
||||
UInt16 offset_day = monday_first_mode ? 0U : 1U;
|
||||
@ -870,7 +870,7 @@ public:
|
||||
|
||||
/// Get first day of week with week_mode, return Sunday or Monday
|
||||
template <typename DateOrTime>
|
||||
inline auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
|
||||
auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
|
||||
{
|
||||
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
|
||||
if (monday_first_mode)
|
||||
@ -889,7 +889,7 @@ public:
|
||||
|
||||
/// Get last day of week with week_mode, return Saturday or Sunday
|
||||
template <typename DateOrTime>
|
||||
inline auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
|
||||
auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
|
||||
{
|
||||
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
|
||||
if (monday_first_mode)
|
||||
@ -908,7 +908,7 @@ public:
|
||||
}
|
||||
|
||||
/// Check and change mode to effective.
|
||||
inline UInt8 check_week_mode(UInt8 mode) const /// NOLINT
|
||||
UInt8 check_week_mode(UInt8 mode) const /// NOLINT
|
||||
{
|
||||
UInt8 week_format = (mode & 7);
|
||||
if (!(week_format & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST)))
|
||||
@ -917,7 +917,7 @@ public:
|
||||
}
|
||||
|
||||
/// Check and change mode to effective.
|
||||
inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
|
||||
WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
|
||||
{
|
||||
return static_cast<WeekDayMode>(mode & 3);
|
||||
}
|
||||
@ -926,7 +926,7 @@ public:
|
||||
* Returns 0 for monday, 1 for tuesday...
|
||||
*/
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
|
||||
UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if (!sunday_first_day_of_week)
|
||||
@ -936,28 +936,28 @@ public:
|
||||
}
|
||||
|
||||
/// Calculate days in one year.
|
||||
inline UInt16 calc_days_in_year(Int32 year) const /// NOLINT
|
||||
UInt16 calc_days_in_year(Int32 year) const /// NOLINT
|
||||
{
|
||||
return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
|
||||
}
|
||||
|
||||
/// Number of month from some fixed moment in the past (year * 12 + month)
|
||||
template <typename DateOrTime>
|
||||
inline Int32 toRelativeMonthNum(DateOrTime v) const
|
||||
Int32 toRelativeMonthNum(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
return lut[i].year * 12 + lut[i].month;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Int32 toRelativeQuarterNum(DateOrTime v) const
|
||||
Int32 toRelativeQuarterNum(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
return lut[i].year * 4 + (lut[i].month - 1) / 3;
|
||||
}
|
||||
|
||||
/// We count all hour-length intervals, unrelated to offset changes.
|
||||
inline Time toRelativeHourNum(Time t) const
|
||||
Time toRelativeHourNum(Time t) const
|
||||
{
|
||||
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
|
||||
return t / 3600;
|
||||
@ -968,37 +968,37 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Time toRelativeHourNum(DateOrTime v) const
|
||||
Time toRelativeHourNum(DateOrTime v) const
|
||||
{
|
||||
return toRelativeHourNum(lut[toLUTIndex(v)].date);
|
||||
}
|
||||
|
||||
/// The same formula is used for positive time (after Unix epoch) and negative time (before Unix epoch).
|
||||
/// It’s needed for correct work of dateDiff function.
|
||||
inline Time toStableRelativeHourNum(Time t) const
|
||||
Time toStableRelativeHourNum(Time t) const
|
||||
{
|
||||
return (t + DATE_LUT_ADD + 86400 - offset_at_start_of_epoch) / 3600 - (DATE_LUT_ADD / 3600);
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Time toStableRelativeHourNum(DateOrTime v) const
|
||||
Time toStableRelativeHourNum(DateOrTime v) const
|
||||
{
|
||||
return toStableRelativeHourNum(lut[toLUTIndex(v)].date);
|
||||
}
|
||||
|
||||
inline Time toRelativeMinuteNum(Time t) const /// NOLINT
|
||||
Time toRelativeMinuteNum(Time t) const /// NOLINT
|
||||
{
|
||||
return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60);
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Time toRelativeMinuteNum(DateOrTime v) const
|
||||
Time toRelativeMinuteNum(DateOrTime v) const
|
||||
{
|
||||
return toRelativeMinuteNum(lut[toLUTIndex(v)].date);
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
|
||||
auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
|
||||
{
|
||||
if (years == 1)
|
||||
return toFirstDayNumOfYear(v);
|
||||
@ -1019,7 +1019,7 @@ public:
|
||||
|
||||
template <typename Date>
|
||||
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
|
||||
inline auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
|
||||
auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
|
||||
{
|
||||
if (quarters == 1)
|
||||
return toFirstDayNumOfQuarter(d);
|
||||
@ -1028,7 +1028,7 @@ public:
|
||||
|
||||
template <typename Date>
|
||||
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
|
||||
inline auto toStartOfMonthInterval(Date d, UInt64 months) const
|
||||
auto toStartOfMonthInterval(Date d, UInt64 months) const
|
||||
{
|
||||
if (months == 1)
|
||||
return toFirstDayNumOfMonth(d);
|
||||
@ -1042,7 +1042,7 @@ public:
|
||||
|
||||
template <typename Date>
|
||||
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
|
||||
inline auto toStartOfWeekInterval(Date d, UInt64 weeks) const
|
||||
auto toStartOfWeekInterval(Date d, UInt64 weeks) const
|
||||
{
|
||||
if (weeks == 1)
|
||||
return toFirstDayNumOfWeek(d);
|
||||
@ -1056,7 +1056,7 @@ public:
|
||||
|
||||
template <typename Date>
|
||||
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
|
||||
inline Time toStartOfDayInterval(Date d, UInt64 days) const
|
||||
Time toStartOfDayInterval(Date d, UInt64 days) const
|
||||
{
|
||||
if (days == 1)
|
||||
return toDate(d);
|
||||
@ -1152,7 +1152,7 @@ public:
|
||||
return static_cast<DateOrTime>(roundDown(t, seconds));
|
||||
}
|
||||
|
||||
inline LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
|
||||
LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
|
||||
{
|
||||
if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
|
||||
return LUTIndex(0);
|
||||
@ -1167,7 +1167,7 @@ public:
|
||||
}
|
||||
|
||||
/// Create DayNum from year, month, day of month.
|
||||
inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
|
||||
ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
|
||||
{
|
||||
if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
|
||||
return ExtendedDayNum(default_error_day_num);
|
||||
@ -1175,14 +1175,14 @@ public:
|
||||
return toDayNum(makeLUTIndex(year, month, day_of_month));
|
||||
}
|
||||
|
||||
inline Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
|
||||
Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
|
||||
{
|
||||
return lut[makeLUTIndex(year, month, day_of_month)].date;
|
||||
}
|
||||
|
||||
/** Does not accept daylight saving time as argument: in case of ambiguity, it choose greater timestamp.
|
||||
*/
|
||||
inline Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
|
||||
Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
|
||||
{
|
||||
size_t index = makeLUTIndex(year, month, day_of_month);
|
||||
Time time_offset = hour * 3600 + minute * 60 + second;
|
||||
@ -1194,28 +1194,28 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline const Values & getValues(DateOrTime v) const { return lut[toLUTIndex(v)]; }
|
||||
const Values & getValues(DateOrTime v) const { return lut[toLUTIndex(v)]; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt32 toNumYYYYMM(DateOrTime v) const
|
||||
UInt32 toNumYYYYMM(DateOrTime v) const
|
||||
{
|
||||
const Values & values = getValues(v);
|
||||
return values.year * 100 + values.month;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt32 toNumYYYYMMDD(DateOrTime v) const
|
||||
UInt32 toNumYYYYMMDD(DateOrTime v) const
|
||||
{
|
||||
const Values & values = getValues(v);
|
||||
return values.year * 10000 + values.month * 100 + values.day_of_month;
|
||||
}
|
||||
|
||||
inline Time YYYYMMDDToDate(UInt32 num) const /// NOLINT
|
||||
Time YYYYMMDDToDate(UInt32 num) const /// NOLINT
|
||||
{
|
||||
return makeDate(num / 10000, num / 100 % 100, num % 100);
|
||||
}
|
||||
|
||||
inline ExtendedDayNum YYYYMMDDToDayNum(UInt32 num) const /// NOLINT
|
||||
ExtendedDayNum YYYYMMDDToDayNum(UInt32 num) const /// NOLINT
|
||||
{
|
||||
return makeDayNum(num / 10000, num / 100 % 100, num % 100);
|
||||
}
|
||||
@ -1241,13 +1241,13 @@ public:
|
||||
TimeComponents time;
|
||||
};
|
||||
|
||||
inline DateComponents toDateComponents(Time t) const
|
||||
DateComponents toDateComponents(Time t) const
|
||||
{
|
||||
const Values & values = getValues(t);
|
||||
return { values.year, values.month, values.day_of_month };
|
||||
}
|
||||
|
||||
inline DateTimeComponents toDateTimeComponents(Time t) const
|
||||
DateTimeComponents toDateTimeComponents(Time t) const
|
||||
{
|
||||
const LUTIndex index = findIndex(t);
|
||||
const Values & values = lut[index];
|
||||
@ -1283,12 +1283,12 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline DateTimeComponents toDateTimeComponents(DateOrTime v) const
|
||||
DateTimeComponents toDateTimeComponents(DateOrTime v) const
|
||||
{
|
||||
return toDateTimeComponents(lut[toLUTIndex(v)].date);
|
||||
}
|
||||
|
||||
inline UInt64 toNumYYYYMMDDhhmmss(Time t) const
|
||||
UInt64 toNumYYYYMMDDhhmmss(Time t) const
|
||||
{
|
||||
DateTimeComponents components = toDateTimeComponents(t);
|
||||
|
||||
@ -1301,7 +1301,7 @@ public:
|
||||
+ UInt64(components.date.year) * 10000000000;
|
||||
}
|
||||
|
||||
inline Time YYYYMMDDhhmmssToTime(UInt64 num) const /// NOLINT
|
||||
Time YYYYMMDDhhmmssToTime(UInt64 num) const /// NOLINT
|
||||
{
|
||||
return makeDateTime(
|
||||
num / 10000000000,
|
||||
@ -1315,7 +1315,7 @@ public:
|
||||
/// Adding calendar intervals.
|
||||
/// Implementation specific behaviour when delta is too big.
|
||||
|
||||
inline NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
|
||||
NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
|
||||
{
|
||||
const LUTIndex index = findIndex(t);
|
||||
const Values & values = lut[index];
|
||||
@ -1332,12 +1332,12 @@ public:
|
||||
return lut[new_index].date + time;
|
||||
}
|
||||
|
||||
inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
|
||||
NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
|
||||
{
|
||||
return addDays(t, delta * 7);
|
||||
}
|
||||
|
||||
inline UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
|
||||
UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
|
||||
{
|
||||
if (likely(day_of_month <= 28))
|
||||
return day_of_month;
|
||||
@ -1351,7 +1351,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
|
||||
LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
|
||||
{
|
||||
const Values & values = lut[toLUTIndex(v)];
|
||||
|
||||
@ -1375,11 +1375,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// If resulting month has less deys than source month, then saturation can happen.
|
||||
/// If resulting month has less days than source month, then saturation can happen.
|
||||
/// Example: 31 Aug + 1 month = 30 Sep.
|
||||
template <typename DateTime>
|
||||
requires std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>
|
||||
inline Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
|
||||
Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
|
||||
{
|
||||
const auto result_day = addMonthsIndex(t, delta);
|
||||
|
||||
@ -1405,7 +1405,7 @@ public:
|
||||
|
||||
template <typename Date>
|
||||
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
|
||||
inline auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
|
||||
auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
|
||||
{
|
||||
if constexpr (std::is_same_v<Date, DayNum>)
|
||||
return toDayNum(LUTIndexWithSaturation(addMonthsIndex(d, delta)));
|
||||
@ -1414,13 +1414,13 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto NO_SANITIZE_UNDEFINED addQuarters(DateOrTime d, Int64 delta) const
|
||||
auto NO_SANITIZE_UNDEFINED addQuarters(DateOrTime d, Int64 delta) const
|
||||
{
|
||||
return addMonths(d, delta * 3);
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline LUTIndex NO_SANITIZE_UNDEFINED addYearsIndex(DateOrTime v, Int64 delta) const
|
||||
LUTIndex NO_SANITIZE_UNDEFINED addYearsIndex(DateOrTime v, Int64 delta) const
|
||||
{
|
||||
const Values & values = lut[toLUTIndex(v)];
|
||||
|
||||
@ -1438,7 +1438,7 @@ public:
|
||||
/// Saturation can occur if 29 Feb is mapped to non-leap year.
|
||||
template <typename DateTime>
|
||||
requires std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>
|
||||
inline Time addYears(DateTime t, Int64 delta) const
|
||||
Time addYears(DateTime t, Int64 delta) const
|
||||
{
|
||||
auto result_day = addYearsIndex(t, delta);
|
||||
|
||||
@ -1464,7 +1464,7 @@ public:
|
||||
|
||||
template <typename Date>
|
||||
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
|
||||
inline auto addYears(Date d, Int64 delta) const
|
||||
auto addYears(Date d, Int64 delta) const
|
||||
{
|
||||
if constexpr (std::is_same_v<Date, DayNum>)
|
||||
return toDayNum(LUTIndexWithSaturation(addYearsIndex(d, delta)));
|
||||
@ -1473,7 +1473,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
inline std::string timeToString(Time t) const
|
||||
std::string timeToString(Time t) const
|
||||
{
|
||||
DateTimeComponents components = toDateTimeComponents(t);
|
||||
|
||||
@ -1498,7 +1498,7 @@ public:
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string dateToString(Time t) const
|
||||
std::string dateToString(Time t) const
|
||||
{
|
||||
const Values & values = getValues(t);
|
||||
|
||||
@ -1516,7 +1516,7 @@ public:
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string dateToString(ExtendedDayNum d) const
|
||||
std::string dateToString(ExtendedDayNum d) const
|
||||
{
|
||||
const Values & values = getValues(d);
|
||||
|
||||
|
@ -7,11 +7,19 @@
|
||||
#include <filesystem>
|
||||
#include <format>
|
||||
#include <map>
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
|
@ -2,7 +2,14 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -8,7 +8,7 @@
|
||||
* table, so it makes sense to pre-fault the pages so that page faults don't
|
||||
* interrupt the resize loop. Set the allocator parameter accordingly.
|
||||
*/
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */, true /* populate */>;
|
||||
|
||||
template <size_t initial_bytes = 64>
|
||||
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;
|
||||
|
@ -8,6 +8,9 @@
|
||||
# include <base/defines.h>
|
||||
# include <simdjson.h>
|
||||
# include "ElementTypes.h"
|
||||
# include <Common/PODArray_fwd.h>
|
||||
# include <Common/PODArray.h>
|
||||
# include <charconv>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,6 +19,254 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
}
|
||||
|
||||
/// Format elements of basic types into string.
|
||||
/// The original implementation is mini_formatter in simdjson.h. But it is not public API, so we
|
||||
/// add a implementation here.
|
||||
class SimdJSONBasicFormatter
|
||||
{
|
||||
public:
|
||||
explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
|
||||
inline void comma() { oneChar(','); }
|
||||
/** Start an array, prints [ **/
|
||||
inline void startArray() { oneChar('['); }
|
||||
/** End an array, prints ] **/
|
||||
inline void endArray() { oneChar(']'); }
|
||||
/** Start an array, prints { **/
|
||||
inline void startObject() { oneChar('{'); }
|
||||
/** Start an array, prints } **/
|
||||
inline void endObject() { oneChar('}'); }
|
||||
/** Prints a true **/
|
||||
inline void trueAtom()
|
||||
{
|
||||
const char * s = "true";
|
||||
buffer.insert(s, s + 4);
|
||||
}
|
||||
/** Prints a false **/
|
||||
inline void falseAtom()
|
||||
{
|
||||
const char * s = "false";
|
||||
buffer.insert(s, s + 5);
|
||||
}
|
||||
/** Prints a null **/
|
||||
inline void nullAtom()
|
||||
{
|
||||
const char * s = "null";
|
||||
buffer.insert(s, s + 4);
|
||||
}
|
||||
/** Prints a number **/
|
||||
inline void number(int64_t x)
|
||||
{
|
||||
char number_buffer[24];
|
||||
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||
buffer.insert(number_buffer, res.ptr);
|
||||
}
|
||||
/** Prints a number **/
|
||||
inline void number(uint64_t x)
|
||||
{
|
||||
char number_buffer[24];
|
||||
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||
buffer.insert(number_buffer, res.ptr);
|
||||
}
|
||||
/** Prints a number **/
|
||||
inline void number(double x)
|
||||
{
|
||||
char number_buffer[24];
|
||||
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||
buffer.insert(number_buffer, res.ptr);
|
||||
}
|
||||
/** Prints a key (string + colon) **/
|
||||
inline void key(std::string_view unescaped)
|
||||
{
|
||||
string(unescaped);
|
||||
oneChar(':');
|
||||
}
|
||||
/** Prints a string. The string is escaped as needed. **/
|
||||
inline void string(std::string_view unescaped)
|
||||
{
|
||||
oneChar('\"');
|
||||
size_t i = 0;
|
||||
// Fast path for the case where we have no control character, no ", and no backslash.
|
||||
// This should include most keys.
|
||||
//
|
||||
// We would like to use 'bool' but some compilers take offense to bitwise operation
|
||||
// with bool types.
|
||||
constexpr static char needs_escaping[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
for (; i + 8 <= unescaped.length(); i += 8)
|
||||
{
|
||||
// Poor's man vectorization. This could get much faster if we used SIMD.
|
||||
//
|
||||
// It is not the case that replacing '|' with '||' would be neutral performance-wise.
|
||||
if (needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i + 1])]
|
||||
| needs_escaping[uint8_t(unescaped[i + 2])] | needs_escaping[uint8_t(unescaped[i + 3])]
|
||||
| needs_escaping[uint8_t(unescaped[i + 4])] | needs_escaping[uint8_t(unescaped[i + 5])]
|
||||
| needs_escaping[uint8_t(unescaped[i + 6])] | needs_escaping[uint8_t(unescaped[i + 7])])
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; i < unescaped.length(); i++)
|
||||
{
|
||||
if (needs_escaping[uint8_t(unescaped[i])])
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// The following is also possible and omits a 256-byte table, but it is slower:
|
||||
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
|
||||
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
|
||||
|
||||
// At least for long strings, the following should be fast. We could
|
||||
// do better by integrating the checks and the insertion.
|
||||
buffer.insert(unescaped.data(), unescaped.data() + i);
|
||||
// We caught a control character if we enter this loop (slow).
|
||||
// Note that we are do not restart from the beginning, but rather we continue
|
||||
// from the point where we encountered something that requires escaping.
|
||||
for (; i < unescaped.length(); i++)
|
||||
{
|
||||
switch (unescaped[i])
|
||||
{
|
||||
case '\"': {
|
||||
const char * s = "\\\"";
|
||||
buffer.insert(s, s + 2);
|
||||
}
|
||||
break;
|
||||
case '\\': {
|
||||
const char * s = "\\\\";
|
||||
buffer.insert(s, s + 2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (uint8_t(unescaped[i]) <= 0x1F)
|
||||
{
|
||||
// If packed, this uses 8 * 32 bytes.
|
||||
// Note that we expect most compilers to embed this code in the data
|
||||
// section.
|
||||
constexpr static simdjson::escape_sequence escaped[32] = {
|
||||
{6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"},
|
||||
{6, "\\u0007"}, {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, {2, "\\f"}, {2, "\\r"},
|
||||
{6, "\\u000e"}, {6, "\\u000f"}, {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, {6, "\\u0014"},
|
||||
{6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
|
||||
{6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
|
||||
auto u = escaped[uint8_t(unescaped[i])];
|
||||
buffer.insert(u.string, u.string + u.length);
|
||||
}
|
||||
else
|
||||
{
|
||||
oneChar(unescaped[i]);
|
||||
}
|
||||
} // switch
|
||||
} // for
|
||||
oneChar('\"');
|
||||
}
|
||||
|
||||
inline void oneChar(char c)
|
||||
{
|
||||
buffer.push_back(c);
|
||||
}
|
||||
private:
|
||||
PaddedPODArray<UInt8> & buffer;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/// Format object elements into string, element, array, object, kv-pair.
|
||||
/// Similar to string_builder in simdjson.h.
|
||||
class SimdJSONElementFormatter
|
||||
{
|
||||
public:
|
||||
explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
|
||||
/** Append an element to the builder (to be printed) **/
|
||||
inline void append(simdjson::dom::element value)
|
||||
{
|
||||
switch (value.type())
|
||||
{
|
||||
case simdjson::dom::element_type::UINT64: {
|
||||
format.number(value.get_uint64().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::INT64: {
|
||||
format.number(value.get_int64().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::DOUBLE: {
|
||||
format.number(value.get_double().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::STRING: {
|
||||
format.string(value.get_string().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::BOOL: {
|
||||
if (value.get_bool().value_unsafe())
|
||||
format.trueAtom();
|
||||
else
|
||||
format.falseAtom();
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::NULL_VALUE: {
|
||||
format.nullAtom();
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::ARRAY: {
|
||||
append(value.get_array().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::OBJECT: {
|
||||
append(value.get_object().value_unsafe());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/** Append an array to the builder (to be printed) **/
|
||||
inline void append(simdjson::dom::array value)
|
||||
{
|
||||
format.startArray();
|
||||
auto iter = value.begin();
|
||||
auto end = value.end();
|
||||
if (iter != end)
|
||||
{
|
||||
append(*iter);
|
||||
for (++iter; iter != end; ++iter)
|
||||
{
|
||||
format.comma();
|
||||
append(*iter);
|
||||
}
|
||||
}
|
||||
format.endArray();
|
||||
}
|
||||
|
||||
inline void append(simdjson::dom::object value)
|
||||
{
|
||||
format.startObject();
|
||||
auto pair = value.begin();
|
||||
auto end = value.end();
|
||||
if (pair != end)
|
||||
{
|
||||
append(*pair);
|
||||
for (++pair; pair != end; ++pair)
|
||||
{
|
||||
format.comma();
|
||||
append(*pair);
|
||||
}
|
||||
}
|
||||
format.endObject();
|
||||
}
|
||||
|
||||
inline void append(simdjson::dom::key_value_pair kv)
|
||||
{
|
||||
format.key(kv.key);
|
||||
append(kv.value);
|
||||
}
|
||||
private:
|
||||
SimdJSONBasicFormatter format;
|
||||
};
|
||||
|
||||
/// This class can be used as an argument for the template class FunctionJSON.
|
||||
/// It provides ability to parse JSONs using simdjson library.
|
||||
struct SimdJSONParser
|
||||
|
@ -441,8 +441,7 @@ finish:
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
void OptimizedRegularExpression::analyze(
|
||||
std::string_view regexp_,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
@ -467,8 +466,7 @@ catch (...)
|
||||
LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false));
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
OptimizedRegularExpression::OptimizedRegularExpression(const std::string & regexp_, int options)
|
||||
{
|
||||
std::vector<std::string> alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy);
|
||||
@ -486,7 +484,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
|
||||
if (!is_trivial)
|
||||
{
|
||||
/// Compile the re2 regular expression.
|
||||
typename RegexType::Options regexp_options;
|
||||
typename re2::RE2::Options regexp_options;
|
||||
|
||||
/// Never write error messages to stderr. It's ignorant to do it from library code.
|
||||
regexp_options.set_log_errors(false);
|
||||
@ -497,7 +495,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
|
||||
if (is_dot_nl)
|
||||
regexp_options.set_dot_nl(true);
|
||||
|
||||
re2 = std::make_unique<RegexType>(regexp_, regexp_options);
|
||||
re2 = std::make_unique<re2::RE2>(regexp_, regexp_options);
|
||||
if (!re2->ok())
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
@ -527,8 +525,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept
|
||||
OptimizedRegularExpression::OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept
|
||||
: is_trivial(rhs.is_trivial)
|
||||
, required_substring_is_prefix(rhs.required_substring_is_prefix)
|
||||
, is_case_insensitive(rhs.is_case_insensitive)
|
||||
@ -545,8 +542,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(Opti
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
|
||||
bool OptimizedRegularExpression::match(const char * subject, size_t subject_size) const
|
||||
{
|
||||
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
|
||||
const UInt8 * haystack_end = haystack + subject_size;
|
||||
@ -577,13 +573,12 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
}
|
||||
}
|
||||
|
||||
return re2->Match({subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
|
||||
return re2->Match({subject, subject_size}, 0, subject_size, re2::RE2::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
|
||||
bool OptimizedRegularExpression::match(const char * subject, size_t subject_size, Match & match) const
|
||||
{
|
||||
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
|
||||
const UInt8 * haystack_end = haystack + subject_size;
|
||||
@ -624,7 +619,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
|
||||
std::string_view piece;
|
||||
|
||||
if (!RegexType::PartialMatch({subject, subject_size}, *re2, &piece))
|
||||
if (!re2::RE2::PartialMatch({subject, subject_size}, *re2, &piece))
|
||||
return false;
|
||||
else
|
||||
{
|
||||
@ -636,8 +631,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
unsigned OptimizedRegularExpression::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
{
|
||||
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
|
||||
const UInt8 * haystack_end = haystack + subject_size;
|
||||
@ -695,7 +689,7 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
|
||||
{subject, subject_size},
|
||||
0,
|
||||
subject_size,
|
||||
RegexType::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
pieces.data(),
|
||||
static_cast<int>(pieces.size())))
|
||||
{
|
||||
@ -721,6 +715,3 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class OptimizedRegularExpressionImpl<true>;
|
||||
template class OptimizedRegularExpressionImpl<false>;
|
||||
|
@ -6,9 +6,15 @@
|
||||
#include <optional>
|
||||
#include <Common/StringSearcher.h>
|
||||
#include "config.h"
|
||||
#include <re2/re2.h>
|
||||
#include <re2_st/re2.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
/** Uses two ways to optimize a regular expression:
|
||||
* 1. If the regular expression is trivial (reduces to finding a substring in a string),
|
||||
@ -37,8 +43,7 @@ namespace OptimizedRegularExpressionDetails
|
||||
};
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
class OptimizedRegularExpressionImpl
|
||||
class OptimizedRegularExpression
|
||||
{
|
||||
public:
|
||||
enum Options
|
||||
@ -51,12 +56,10 @@ public:
|
||||
using Match = OptimizedRegularExpressionDetails::Match;
|
||||
using MatchVec = std::vector<Match>;
|
||||
|
||||
using RegexType = std::conditional_t<thread_safe, re2::RE2, re2_st::RE2>;
|
||||
|
||||
OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT
|
||||
OptimizedRegularExpression(const std::string & regexp_, int options = 0); /// NOLINT
|
||||
/// StringSearcher store pointers to required_substring, it must be updated on move.
|
||||
OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept;
|
||||
OptimizedRegularExpressionImpl(const OptimizedRegularExpressionImpl & rhs) = delete;
|
||||
OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept;
|
||||
OptimizedRegularExpression(const OptimizedRegularExpression & rhs) = delete;
|
||||
|
||||
bool match(const std::string & subject) const
|
||||
{
|
||||
@ -85,7 +88,7 @@ public:
|
||||
unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }
|
||||
|
||||
/// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
|
||||
const std::unique_ptr<RegexType> & getRE2() const { return re2; }
|
||||
const std::unique_ptr<re2::RE2> & getRE2() const { return re2; }
|
||||
|
||||
void getAnalyzeResult(std::string & out_required_substring, bool & out_is_trivial, bool & out_required_substring_is_prefix) const
|
||||
{
|
||||
@ -110,9 +113,6 @@ private:
|
||||
std::string required_substring;
|
||||
std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
|
||||
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
|
||||
std::unique_ptr<RegexType> re2;
|
||||
std::unique_ptr<re2::RE2> re2;
|
||||
unsigned number_of_subpatterns;
|
||||
};
|
||||
|
||||
using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
|
||||
using OptimizedRegularExpressionSingleThreaded = OptimizedRegularExpressionImpl<false>;
|
||||
|
@ -253,6 +253,13 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(PolygonsAddedToPool, "A polygon has been added to the cache (pool) for the 'pointInPolygon' function.") \
|
||||
M(PolygonsInPoolAllocatedBytes, "The number of bytes for polygons added to the cache (pool) for the 'pointInPolygon' function.") \
|
||||
\
|
||||
M(USearchAddCount, "Number of vectors added to usearch indexes.") \
|
||||
M(USearchAddVisitedMembers, "Number of nodes visited when adding vectors to usearch indexes.") \
|
||||
M(USearchAddComputedDistances, "Number of times distance was computed when adding vectors to usearch indexes.") \
|
||||
M(USearchSearchCount, "Number of search operations performed in usearch indexes.") \
|
||||
M(USearchSearchVisitedMembers, "Number of nodes visited when searching in usearch indexes.") \
|
||||
M(USearchSearchComputedDistances, "Number of times distance was computed when searching usearch indexes.") \
|
||||
\
|
||||
M(RWLockAcquiredReadLocks, "Number of times a read lock was acquired (in a heavy RWLock).") \
|
||||
M(RWLockAcquiredWriteLocks, "Number of times a write lock was acquired (in a heavy RWLock).") \
|
||||
M(RWLockReadersWaitMilliseconds, "Total time spent waiting for a read lock to be acquired (in a heavy RWLock).") \
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include <re2/re2.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
@ -6,6 +5,14 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user